ML Lab9 : Neelmani- 224161019

Question 1

In [5]:
import numpy as np
import pandas as pd
from scipy.stats import multivariate_normal

# Load training data
train_data = pd.read_csv("Train_file.csv")
X_train = train_data.iloc[:, :-1].values
y_train = train_data.iloc[:, -1].values

# Calculate class statistics
X0_train = X_train[y_train == 0]
X1_train = X_train[y_train == 1]

mu0 = np.mean(X0_train, axis=0)
mu1 = np.mean(X1_train, axis=0)
C0 = np.cov(X0_train.T)
C1 = np.cov(X1_train.T)

# Estimate prior probabilities
prior0 = np.sum(y_train == 0) / len(y_train)
prior1 = np.sum(y_train == 1) / len(y_train)

# Define discriminant function
def discriminant_function(x):
    # Compute Gaussian densities
    p_x_given_0 = multivariate_normal.pdf(x, mean=mu0, cov=C0)
    p_x_given_1 = multivariate_normal.pdf(x, mean=mu1, cov=C1)
    
    # Compute discriminant score
    g_x = np.log(p_x_given_1/p_x_given_0) \
          - 0.5 * (mu1.T.dot(np.linalg.inv(C1)).dot(mu1) \
                   - mu0.T.dot(np.linalg.inv(C0)).dot(mu0)) \
          - 0.5 * np.log(np.linalg.det(C1)/np.linalg.det(C0)) \
          + x.T.dot(np.linalg.inv(C1) - np.linalg.inv(C0)).dot(mu1 - mu0)
    
    return g_x

# Load test data
test_data = pd.read_csv("Test_file.csv")
X_test = test_data.iloc[:, :-1].values
y_test = test_data.iloc[:, -1].values

# Evaluate model on test data
y_pred = []
for i in range(len(X_test)):
    g_x = discriminant_function(X_test[i])
    y_pred.append(1 if g_x >= 0 else 0)

# Compute accuracy
accuracy = np.sum(y_pred == y_test) / len(y_test)
print("Accuracy:", accuracy)


Accuracy: 0.7897280966767372


In [1]:
import numpy as np
import pandas as pd

# Load train data from csv file
train_data = pd.read_csv('Train_file.csv', header=None).values

# Separate feature vectors and labels
X_train = train_data[:, :-1]
y_train = train_data[:, -1]

# Compute class means
mu0 = np.mean(X_train[y_train == 0], axis=0)
mu1 = np.mean(X_train[y_train == 1], axis=0)

# Compute class covariance matrices
C0 = np.cov(X_train[y_train == 0].T)
C1 = np.cov(X_train[y_train == 1].T)

# Compute inverse and determinant of covariance matrices
C0_inv = np.linalg.inv(C0)
C1_inv = np.linalg.inv(C1)
C0_det = np.linalg.det(C0)
C1_det = np.linalg.det(C1)

# Define discriminant function
def discriminant(x):
    term1 = np.log(C1_det / C0_det)
    term2 = 0.5 * np.dot(np.dot((mu1 - mu0).T, C1_inv), (mu1 - mu0))
    term3 = -0.5 * np.dot(np.dot(x.T, C1_inv - C0_inv), x)
    return term1 + term2 + term3

# Load test data from csv file
test_data = pd.read_csv('Test_file.csv', header=None).values

# Separate feature vectors and labels
X_test = test_data[:, :-1]
y_test = test_data[:, -1]

# Classify test instances using discriminant function
y_pred = np.zeros_like(y_test)
for i in range(X_test.shape[0]):
    x = X_test[i]
    y_pred[i] = 1 if discriminant(x) >= 0 else 0

# Compute accuracy of classification
accuracy = np.mean(y_pred == y_test)
print('Accuracy:', accuracy)


Accuracy: 0.518962632459565


Question 2

In [4]:
import numpy as np
import csv

# Define sigmoid function
def sigmoid(x):
    return 1 / (1 + np.exp(-x))

# Define function to train logistic regression model
def train_logistic_regression(X_train, Y_train, learning_rate=0.1, num_iterations=100):
    # Initialize weights and bias
    n_features = X_train.shape[1]
    weights = np.zeros(n_features)
    bias = 0
    
    # Gradient descent
    for i in range(num_iterations):
        # Compute linear combination of inputs and weights
        z = np.dot(X_train, weights) + bias
        
        # Apply sigmoid function to compute predicted probabilities
        predictions = sigmoid(z)
        
        # Compute gradients
        dw = np.dot(X_train.T, (predictions - Y_train)) / Y_train.size
        db = np.sum(predictions - Y_train) / Y_train.size
        
        # Update weights and bias
        weights -= learning_rate * dw
        bias -= learning_rate * db
    
    return weights, bias

# Define function to predict labels
def predict_logistic_regression(X_test, weights, bias):
    # Compute linear combination of inputs and weights
    z = np.dot(X_test, weights) + bias
    
    # Apply sigmoid function to compute predicted probabilities
    predictions = sigmoid(z)
    
    # Round probabilities to obtain predicted labels
    labels = np.round(predictions)
    
    return labels

# Load training data
X_train = []
Y_train = []
with open('Train_file.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        X_train.append([float(i) for i in row[:-1]])
        Y_train.append(float(row[-1]))
X_train = np.array(X_train)
Y_train = np.array(Y_train)

# Train logistic regression model
weights, bias = train_logistic_regression(X_train, Y_train)

# Load testing data
X_test = []
Y_test = []
with open('Test_file.csv') as csvfile:
    reader = csv.reader(csvfile)
    for row in reader:
        X_test.append([float(i) for i in row[:-1]])
        Y_test.append(float(row[-1]))
X_test = np.array(X_test)
Y_test = np.array(Y_test)

# Predict labels for testing data
predicted_labels = predict_logistic_regression(X_test, weights, bias)

# Compute class-wise and overall accuracy measures
n0 = np.sum(Y_test == 0)
n1 = np.sum(Y_test == 1)
n00 = np.sum(np.logical_and(Y_test == 0, predicted_labels == 0))
n01 = np.sum(np.logical_and(Y_test == 0, predicted_labels == 1))
n10 = np.sum(np.logical_and(Y_test == 1, predicted_labels == 0))
n11 = np.sum(np.logical_and(Y_test == 1, predicted_labels == 1))
p0 = n00 / n0
p1 = n11 / n1
p = (n00 + n11) / Y_test.size

# Print accuracy measures
print('Class 0 accuracy:', p0)
print('Class 1 accuracy:', p1)
print('Overall accuracy:', p)


Class 0 accuracy: 0.892222845391662
Class 1 accuracy: 0.9014605287483823
Overall accuracy: 0.8968674474809444
