In [1]:
import scipy.io

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import scipy.stats as st

In [2]:
#get data
Numpyfile= scipy.io.loadmat('mnist_data.mat')

trX=Numpyfile.get('trX')
trY=Numpyfile.get('trY')

tsX=Numpyfile.get('tsX')
tsY=Numpyfile.get('tsY')

In [3]:
trX=pd.DataFrame(trX)
tsX=pd.DataFrame(tsX)

#transform to column vector
trY=pd.DataFrame(trY).T
tsY=pd.DataFrame(tsY).T

In [4]:
#combine train and test sets for feature extraction
data=pd.concat([trX,tsX],axis=0)

In [5]:
#extract features
feature1=np.mean(data,axis=1)
feature2=np.sqrt(np.var(data,axis=1))

In [6]:
features=pd.concat([feature1,feature2],axis=1)

In [7]:
#train- (12116,2)
#test- (2002,2)
train=pd.DataFrame(features[:12116])
train.columns=['mean','sd']
test=pd.DataFrame(features[12116:])
test.columns=['mean','sd']

In [8]:
#------------------------------------Naive Bayes------------------------------------------

In [9]:
#separate 7s and 8s
train8=train[trY[0]==1]
train7=train[trY[0]==0]

In [10]:
#calculate mean and SD of the 2 features
avg7=np.mean(train7,axis=0)
avg8=np.mean(train8,axis=0)

sd7=np.sqrt(np.var(train7,axis=0))
sd8=np.sqrt(np.var(train8,axis=0))

In [11]:
print(avg7,sd7,avg8,sd8)

mean    0.114528
sd      0.287557
dtype: float64 mean    0.030632
sd      0.038201
dtype: float64 mean    0.150156
sd      0.320476
dtype: float64 mean    0.038632
sd      0.039960
dtype: float64


In [12]:
#covariance matrices
cov7=[[sd7[0]**2,0],[0,sd7[1]**2]]
cov8=[[sd8[0]**2,0],[0,sd8[1]**2]]

In [13]:
#Calculate probability density of being 7 and 8 using multivariate normal distribution for each image 
prob7=pd.DataFrame(st.multivariate_normal(mean=avg7,cov=cov7).pdf(test))
prob8=pd.DataFrame(st.multivariate_normal(mean=avg8,cov=cov8).pdf(test))

In [14]:
#Get prior probabilites
prior7=len(train7.index)/len(trX.index)
prior8=len(train8.index)/len(trX.index)

In [15]:
print(prior7,prior8)

0.5170848464839881 0.4829151535160119


In [16]:
#Use the bayes theorem to get conditional probabilities for labels
prob7=prob7*prior7
prob8=prob8*prior8

In [17]:
probs=pd.concat([prob7,prob8],axis=1)
probs.columns=['p7','p8']

In [18]:
#Set 1 if p8>p7
bool=probs['p8']>probs['p7']

In [19]:
probs['label']=bool

In [20]:
#Number of correct predictions
bool=probs['label']==tsY[0]

In [21]:
#Accuracy=Number of correct predictions/total predictions
acc=(np.sum(bool)/len(test.index))*100
print(str(round(acc,2)),"%")

69.53 %


In [22]:
#Accuracy of 7s and 8s
bool7=tsY[0]==0
bool8=tsY[0]==1

n7=np.sum(bool7)
n8=np.sum(bool8)

acc7=(np.sum(probs[bool7]['label']==False)/n7)*100
print("Accuracy of 7s:",str(round(acc7,2)),"%")

acc8=(np.sum(probs[bool8]['label'])/n8)*100
print("Accuracy of 8s:",str(round(acc8,2)),"%")

Accuracy of 7s: 75.97 %
Accuracy of 8s: 62.73 %


In [23]:
#------------------------------------Logistic Regression------------------------------------------

In [24]:
#add intercept terms x0 train- (12116,3)
x0=np.ones((train.shape[0],1))
train=np.hstack((x0,train))

weights=np.zeros(train.shape[1])

In [25]:
def sigmoid_fun(prod):
    return 1/(1+np.exp(-prod))

In [26]:
def log_likelihood(data,target,weights):
    prod=np.dot(data,weights)
    ll=np.sum(target*prod-np.log(1+np.exp(prod)))
    return ll

In [27]:
#decrease the error every iteration and maximize the log likelihood function
def logistic_reg(features,target,weights,num_steps,learning_rate):
    init=log_likelihood(train,trY[0],weights)
    print("initial ",init)
    
    for step in range(num_steps):
        prod=np.dot(features,weights)
        prediction=sigmoid_fun(prod)

        # Update weights with gradient
        error=target-prediction
        gradient=np.dot(features.T,error)
        weights+=learning_rate*gradient
        
        if step%10000==0:
            print(log_likelihood(features,target,weights))

    return weights

In [28]:
weights=logistic_reg(train,trY[0],weights,50000,0.001)

initial  -8398.171239664298
-8399.407469883352
-9646.11650137537
-8558.982068003384
-7652.234937363416
-6967.4403853615


In [29]:
print(weights)

[  20.66826309  227.34668918 -162.92629572]


In [30]:
#add intercept to test set
x0 = np.ones((test.shape[0], 1))
test=np.hstack((x0,test))

#predict values
pred=np.dot(test,weights)

#classify as 8 if probability>0.5 else classify as 7
bool=pred>0.5

In [31]:
#Number of correct predictions
bool=bool==tsY[0]

In [32]:
#Accuracy=Number of correct predictions/total predictions
acc=(np.sum(bool)/len(test))*100
print(str(round(acc,2)),"%")

81.57 %


In [33]:
#Accuracy of 7s and 8s
bool7=tsY[0]==0
bool8=tsY[0]==1
bool=pred>0.5

bool7=bool[bool7]==False
bool8=bool[bool8]

acc7=(np.sum(bool7)/n7)*100
print("Accuracy of 7s:",str(round(acc7,2)),"%")

acc8=(np.sum(bool8)/n8)*100
print("Accuracy of 8s:",str(round(acc8,2)),"%")

Accuracy of 7s: 78.79 %
Accuracy of 8s: 84.5 %


In [34]:
# from sklearn.preprocessing import OneHotEncoder

# ohe=OneHotEncoder(sparse=False)
# temp=ohe.fit_transform(tsY)
# print(temp)