In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import pandas as pd
import cv2
from collections import Counter
from sklearn.metrics import accuracy_score
from scipy.stats import binom
import random

# PROBLEM 1 KNN classifier on noisy images

In [None]:
#Loading train images
train_images = np.loadtxt('/content/drive/MyDrive/exm_23S/exam2023S/trainPB1.txt')
train_labels = train_images[:, 0]
train_pixels = train_images[:, 1:]

# Loading test images
test_images = np.loadtxt('/content/drive/MyDrive/exm_23S/exam2023S/testPB1.txt')
test_labels = test_images[:, 0]
test_pixels = test_images[:, 1:]

In Gaussian Blur operation, the image is convolved with a Gaussian filter instead of the box filter. The Gaussian filter is a low-pass filter that removes the high-frequency components are reduced.

In [None]:
# Applying Gaussian blur to train and test pixels
train_pixels = np.array([cv2.GaussianBlur(p.reshape(28, 28), (5, 5), 0).ravel() for p in train_pixels])
test_pixels = np.array([cv2.GaussianBlur(p.reshape(28, 28), (5, 5), 0).ravel() for p in test_pixels])

In this, cv2.GaussianBlur function is used to apply Gaussian blur with a kernel size of (5, 5) to each image in the train and test data. The reshape and ravel functions are used to convert the pixel values from 1D arrays to 2D arrays of shape (28, 28) and back to 1D arrays of shape (784) respectively.

In [None]:
class KNNClassifier:
    def __init__(self, n_neighbors, distance_func):
        self.n_neighbors = n_neighbors
        self.distance_func = distance_func

    def fit(self, X, y):
        self.X_train = X
        self.y_train = y

    def predict(self, X):
        y_pred = []
        for x in X:
            distances = []
            for i, x_train in enumerate(self.X_train):
                if self.distance_func == 'euclidean':
                    dist = np.sqrt(np.sum((x - x_train)**2))
                elif self.distance_func == 'manhattan':
                    dist = np.sum(np.abs(x - x_train))
                else:
                    raise ValueError('Unsupported distance function')
                distances.append((dist, self.y_train[i]))
            distances = sorted(distances)[:self.n_neighbors]
            labels = [d[1] for d in distances]
            y_pred.append(Counter(labels).most_common(1)[0][0])
        return np.array(y_pred)

In [None]:
knn = KNNClassifier(n_neighbors = 20, distance_func = 'euclidean')

# Training the classifier
knn.fit(train_pixels, train_labels)

# Predicting labels for the test data
test_pred = knn.predict(test_pixels)

# Computing the accuracy on the test data
test_acc = accuracy_score(test_labels, test_pred)*100 

print(f'Test accuracy: {test_acc:.2f}')

Test accuracy: 76.40


# PROBLEM 2 Quiz Difficulties Mixture

In [None]:
# Reading the data from the gradesheet file

gradesheet = np.loadtxt('/content/drive/MyDrive/exm_23S/exam2023S/studentgrades_pb2.txt', dtype = int)
n_students, n_questions = gradesheet.shape

# Initialize the parameters
p = np.array([0.1, 0.2, 0.7]) #Randomly initializing p
q = np.array([1/3, 1/3, 1/3]) #Giving equal probability for q

K = 20  # Number of students per session
n_iter = 100 # Number of iterations

# EM algorithm
for iteration in range(n_iter):

    # E-step: Calculating the estimation for each question and each student
    est = np.zeros((n_students, 3))
    for i in range(n_students):
        for k in range(3):
            est[i, k] = q[k] * binom.pmf(np.sum(gradesheet[i]), K, p[k])
        est[i] /= np.sum(est[i])

    # M-step: Updating the values of p and q
    q = np.sum(est, axis=0) / n_students
    for k in range(3):
        p[k] = np.sum(est[:, k] * np.sum(gradesheet == 1, axis=1)) / (np.sum(est[:,k]) * K)

# Probability of solving each problem

In [None]:
# Results
print('p_A =', p[0])
print('p_B =', p[1])
print('p_C =', p[2])

p_A = 0.2369186684893597
p_B = 0.6100378318845451
p_C = 0.931728533231595


# Quiz selection probabilities

In [None]:
# Results
print('q_A =', q[0])
print('q_B =', q[1])
print('q_C =', q[2])

q_A = 0.30681398248878805
q_B = 0.5146283397819822
q_C = 0.17855767772923004
