# Project Part 1: Density Estimation and Classification using Fashion-MNIST

In [161]:
import scipy.io
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [162]:
data=scipy.io.loadmat("fashion_mnist.mat")
#print(data)
trainX=data['trX']
trainY=np.reshape(data['trY'],-1)
testX=data['tsX']
testY=np.reshape(data['tsY'],-1)


# EXTRACTING THE FEATURES

In [163]:
fet1=np.mean(trainX,axis=1)
fet2=np.std(trainX,axis=1)
trainX=pd.DataFrame({'trainF1': fet1, 'trainF2': fet2})
fett1=np.mean(testX,axis=1)
fett2=np.std(testX,axis=1)
testX=pd.DataFrame({'testF1': fett1,'testF2':fett2})


# ESTIMATION OF PARAMETERS

In [164]:
def estimate_parameters(trainX,trainY):
    '''
    This function estimate the parameters for naive bayes..
    '''
    tshirt=trainX[trainY==0]
    trouser=trainX[trainY==1]
    paras={}
    paras['y_0']=0.5
    paras['mu_tsh']=np.mean(tshirt,axis=0)
    paras['std_tsh']=np.std(tshirt,axis=0)
    paras['y_1']=0.5
    paras['mu_tro']=np.mean(trouser,axis=0)
    paras['std_tro']=np.std(trouser,axis=0)
    print('prior of class 0:',paras['y_0'])
    print('mean of class 0 :',paras['mu_tsh'])
    print('std of class 0  :',paras['std_tsh'])
    print('prior of class 1:',paras['y_1'])
    print('mean of class 1 :',paras['mu_tro'])
    print('std of class 1  :',paras['std_tro'])
    return paras
paras = estimate_parameters(trainX,trainY)
#y_0 is prior of Tshirt class
#mu_tsh is mean of the Tshirt class
#std_tsh is standard deviation of the Tshirt class
#y_1 is prior of Trouser class
#mu_tro is mean of the Trouser class
#std_tro is standard deviation of the Trouser class
#trainF1 is the feature 1 in training data set
#trainF2 is the feature 2 in training data set

prior of class 0: 0.5
mean of class 0 : trainF1    0.325608
trainF2    0.320036
dtype: float64
std of class 0  : trainF1    0.113375
trainF2    0.087983
dtype: float64
prior of class 1: 0.5
mean of class 1 : trainF1    0.222905
trainF2    0.333942
dtype: float64
std of class 1  : trainF1    0.056951
trainF2    0.057032
dtype: float64


# NAIVE BAYES CLASSIFICATION

In [165]:
pred=[]
def naive_bayes(testX,paras):
    '''
    It uses parameters and predicts the class of the given test set....
    '''
    for x in testX.to_numpy():
        denom = np.sqrt(2*np.pi*(paras["std_tro"]**2))
        numer = np.exp(-np.square(x - paras["mu_tro"]) / (2*(paras["std_tro"]**2)))
        p_tro = np.sum(np.log(numer/denom))
        denom = np.sqrt(2*np.pi*(paras["std_tsh"]**2))
        numer = np.exp(-np.square(x - paras["mu_tsh"]) / (2*(paras["std_tsh"]**2)))
        p_tsh = np.sum(np.log(numer/denom))
        if p_tro>p_tsh:
            pred.append(1)
        else:
            pred.append(0)

    acc = np.sum(pred == testY) / len(testY)
    count=count1=0
    for i in range(len(testY)):
        if pred[i]==testY[i]:
            if pred[i]==0:
                count=count+1
            else:
                count1=count1+1
    acc_tsh=count*2/len(testY)
    acc_tro=count1*2/len(testY)
    print('NAIVE BAYES RESULTS')
    print('Class Tshirt accuracy :',"%.2f" % (acc_tsh*100),'%')
    print('Class Trouser accuracy:',"%.2f" % (acc_tro*100),'%')
    print('Overall accuracy      :',"%.2f" % (acc*100),'%')
naive_bayes(testX,paras)       

NAIVE BAYES RESULTS
Class Tshirt accuracy : 78.40 %
Class Trouser accuracy: 87.90 %
Overall accuracy      : 83.15 %


# LOGISTIC REGRESSION MODEL

In [166]:
def sigmoid(z):
    '''
    This calculates the sigmoid of the given input...
    '''
    return 1 / (1 + np.exp(-z))

def log_likelihood(y_pred, y):
    '''
    This function calculates the log likelihood using actual and predicted value...
    '''
    eps = 1e-7
    y_pred = np.maximum(np.full(y_pred.shape, eps), np.minimum(np.full(y_pred.shape, 1-eps), y_pred))
        
    return np.sum(y*np.log(y_pred)+(1-y)*np.log(1-y_pred))

def logistic_regression_train(trainX, trainY, epoches,learning_rate):
    '''
    This function learns the parameters from the training set with gradient ascent....
    '''
    
    trainX= trainX.T
    trainY = trainY.T
    m = trainX.shape[1]
    n = trainX.shape[0]
    w = np.zeros((n,1))
    w_0 = 0
    for i in range(epoches):
        z = np.dot(w.T,trainX)+w_0
        tes = sigmoid(z)
        log_likelihood(tes, trainY)
        diff = trainY - tes
        dw_0 = np.sum(diff)
        dw = np.dot(trainX, diff.T)
        w = w + learning_rate * dw
        w_0 = w_0 + learning_rate * dw_0
    print('The weights obtained from gradient ascent:',w[0],w[1])
    print('The bias value:',w_0)
    return [w, w_0]

params = logistic_regression_train(trainX, trainY, 500,0.01) 



The weights obtained from gradient ascent: [-177.69004367] [189.13702829]
The bias value: -14.804176432569857


In [167]:
def logistic_regression_predict( testX, testY, paras):
    '''
    
    '''
    testX = testX.T
    testY = testY.T 
    m = testX.shape[1]
    n = testX.shape[0]
    w = params[0]
    w_0 = params[1]
    z = np.dot(w.T,testX) + w_0
    tes = sigmoid(z)
    y_pred = (tes > 0.5) * 1.0
    accuracy = np.sum(testY == y_pred) / float(m)
    count=count1=0
#     print(y_pred)
    y_pred=y_pred.T
#     print(testY)
    
    for i in range(len(testY)):
        if y_pred[i]==testY[i]:
            if y_pred[i]==0:
                count=count+1
            else:
                count1=count1+1
    acc_tsh=count*2/len(testY)
    acc_tro=count1*2/len(testY)
    print('LOGISTIC REGRESSION RESULTS')
    print('Class Tshirt accuracy :',"%.2f" % (acc_tsh*100),'%')
    print('Class Trouser accuracy:',"%.2f" % (acc_tro*100),'%')
    print('Overall accuracy      :',"%.2f" % (accuracy*100),'%')
logistic_regression_predict(testX,testY,paras)

LOGISTIC REGRESSION RESULTS
Class Tshirt accuracy : 92.50 %
Class Trouser accuracy: 91.70 %
Overall accuracy      : 92.10 %
