Minji Kim 20153029

# MNIST binary classifier for each digit

Build a binary classifier for each digit against all the other digits at MNIST dataset.

Let $x = (x_1, x_2, ... , x_m)$ be a vector representing an image in the dataset.

The prediction function $f_d(x; w)$ is defined by the linear combination of data $(1, x)$ and the model parameter $w$ for each digit $d$:
$$f_d(x; w) = w_0 \times 1 + w_1 \times x_1 + w_2 \times x_2 + ... + w_m \times x_m$$ 
$$where\ w = (w_0, w_1, ... , w_m)$$

The prediction function $f_d(x; w)$ should have the following values:
$$f_d(x; w) = +1 \qquad if\ label(x) = d$$
$$f_d(x; w) = -1 \qquad if\ label(x)\ is\ not\ d$$

The optimal model parameter w is obtained by minimizing the following objective function for each digit $d$:
$$\sum_i ( f_d(x^{(i)}; w) - y^{(i)} )^2$$

and the label of input x is given by:
$$argmax_d f_d(x; w)$$


## Compute an optimal model parameter
Compute an optimal model parameter using the training dataset for each classifier $f_d(x, w)$


In [1]:
import matplotlib.pyplot as plt
import numpy as np
from numpy.linalg import *
import pandas as pd

file_data		= "mnist_train.csv"
handle_file	= open(file_data, "r")
data        		= handle_file.readlines()
handle_file.close()

size_row	= 28    # height of the image
size_col  	= 28    # width of the image

num_image	= len(data)
count       	= 0     # count for the number of images

#
# Normalize the values of the input data to be [0, 1]
#
def normalize(data):

    data_normalized = (data - min(data)) / (max(data) - min(data))

    return(data_normalized)


#
# Make a matrix each column of which represents an images in a vector form 
#
list_image  = np.ones((num_image,(size_row * size_col)+1), dtype=float)
list_label  = np.empty(num_image, dtype=int)

for line in data:

    line_data   = line.split(',')
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:])
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[count, 1:]    = im_vector 

    count += 1


#
# Approximating polynomial function obtained by solving a least square problem
#
Y  = np.empty((num_image, 10), dtype=int)
for i in range(num_image):
    for j in range(10):
        if(list_label[i]==j):
            Y[i][j] = 1
        else:
            Y[i][j] = -1

theta = pinv(list_image.T@list_image)@list_image.T@Y

PF = list_image@theta

Estimaite_label = np.empty(num_image, dtype=int)
for i in range(num_image):
    Estimaite_label[i] = np.argmax(PF[i,:])

## Compute true positive rate, error rate using training dataset


In [10]:
#
# Compute the TP rate and Error rate using the train dataset
#

count = np.zeros((10, 10), dtype=int)

for i in range(num_image):
    count[list_label[i],Estimaite_label[i]] += 1


Result1 = pd.DataFrame(data=count, columns=['E0','E1', 'E2','E3','E4','E5','E6', 'E7', 'E8','E9'])


Result1

Unnamed: 0,E0,E1,E2,E3,E4,E5,E6,E7,E8,E9
0,5682,7,18,14,24,43,64,4,61,6
1,2,6548,40,15,19,31,14,12,55,6
2,99,264,4792,149,108,11,234,91,192,18
3,42,167,176,5159,32,124,56,115,135,125
4,10,99,42,6,5211,50,39,23,59,303
5,164,95,28,433,105,3990,192,36,235,143
6,108,74,61,1,70,90,5476,0,35,3
7,55,189,37,47,170,9,2,5426,10,320
8,75,493,63,226,105,222,56,20,4411,180
9,68,60,20,117,371,12,4,491,38,4768


In [14]:
TP_rate = np.trace(count) / num_image
Error_rate = (num_image - np.trace(count)) / num_image

print("TP Rate =",TP_rate)
print("Error Rate =", Error_rate)

TP Rate = 0.8577166666666667
Error Rate = 0.14228333333333334


## Compute true positive rate, error rate using testing dataset.


In [15]:
file_data		= "mnist_test.csv"
handle_file	= open(file_data, "r")
data        		= handle_file.readlines()
handle_file.close()

num_image	= len(data)
count       	= 0     # count for the number of images


#
# Make a matrix each column of which represents an images in a vector form 
#
list_image  = np.ones((num_image,(size_row * size_col)+1), dtype=float)
list_label  = np.empty(num_image, dtype=int)

for line in data:

    line_data   = line.split(',')
    label       = line_data[0]
    im_vector   = np.asfarray(line_data[1:])
    im_vector   = normalize(im_vector)

    list_label[count]       = label
    list_image[count, 1:]    = im_vector 

    count += 1


#
# Approximating polynomial function obtained by solving a least square problem
#
Y  = np.empty((num_image, 10), dtype=int)
for i in range(num_image):
    for j in range(10):
        if(list_label[i]==j):
            Y[i][j] = 1
        else:
            Y[i][j] = -1

theta = pinv(list_image.T@list_image)@list_image.T@Y

PF = list_image@theta

Estimaite_label = np.empty(num_image, dtype=int)
for i in range(num_image):
    Estimaite_label[i] = np.argmax(PF[i,:])
    
#
# Compute the TP rate and Error rate using the testing dataset
#

count = np.zeros((10, 10), dtype=int)

for i in range(num_image):
    count[list_label[i],Estimaite_label[i]] += 1


Result1 = pd.DataFrame(data=count, columns=['E0','E1', 'E2','E3','E4','E5','E6', 'E7', 'E8','E9'])
Result1

Unnamed: 0,E0,E1,E2,E3,E4,E5,E6,E7,E8,E9
0,949,0,1,6,1,8,7,1,6,1
1,0,1115,5,1,2,1,4,1,6,0
2,11,36,884,17,13,0,23,20,23,5
3,4,16,18,893,5,14,3,17,24,16
4,0,19,5,0,898,1,6,1,7,45
5,17,23,3,55,18,692,21,16,37,10
6,15,9,4,0,9,16,896,0,9,0
7,5,34,13,3,21,1,1,908,1,41
8,13,44,8,26,13,29,9,8,801,23
9,14,13,3,14,58,3,1,53,10,840


In [16]:
TP_rate = np.trace(count) / num_image
Error_rate = (num_image - np.trace(count)) / num_image

print("TP Rate =",TP_rate)
print("Error Rate =", Error_rate)

TP Rate = 0.8876
Error Rate = 0.1124
