In [1]:
import time

import numpy as np
import pandas as pd

from copy import deepcopy

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold

from matplotlib import pyplot as plt

In [2]:
def cross_entropy_loss(reference_Y, predicted_Y):
    
    loss = 0
    m = reference_Y.shape[0]
    
    for yt, yp in zip(reference_Y, predicted_Y):
        
        value = -1 * np.sum(yt * np.log(yp) + (1 - yt) * np.log(1 - yp)) / yt.shape[0]
        
        loss += value
    
    loss = (1 / m) * loss
    
    return loss
    
def one_hot_encoding(values):
    
    classes = np.unique(values).tolist()
    
    total_classes = len(classes)
    
    outputs = []
    
    for value in values:
        
        index = classes.index(value)
        
        output = np.zeros(total_classes)
        output[index] = 1
        
        outputs.append(output)
        
    outputs = np.array(outputs)
    
    return outputs

class SoftmaxClassifier:
    
    def __init__(self, eta0 = 0.01, fit_intercept = True):
        
        self.eta0 = eta0
        self.fit_intercept = fit_intercept
        
        self.intercept_ = np.array([])
        self.coef_ = np.array([])
    
    def _softmax(self, S):
        
        E = np.exp(S)
        return E / E.sum()
    
    def partial_fit(self, X, Y):

        n_classes = Y.shape[1]
        
        if (not self.intercept_.any()):
            self.intercept_ = np.zeros(n_classes)
        
        if (not self.coef_.any()):
            self.coef_ = np.zeros((n_classes, X.shape[1]))
        
        for inputs, targets in zip(X, Y):

            if (self.fit_intercept):
            
                S = np.dot(inputs, self.coef_.T) + self.intercept_

                sigma = self._softmax(S)

                for n_class in range(n_classes):

                    self.intercept_[n_class] = self.intercept_[n_class] - (self.eta0 * (sigma[n_class] - targets[n_class])) 
                    self.coef_[n_class] = self.coef_[n_class] - (self.eta0 * (sigma[n_class] - targets[n_class]) * inputs)
                    
            else:
                
                S = np.dot(inputs, self.coef_.T)

                sigma = self._softmax(S)

                for n_class in range(self.n_classes_):

                    self.coef_[n_class] = self.coef_[n_class] - (self.eta0 * (sigma[n_class] - targets[n_class]) * inputs)
    
    def predict(self, X):
        
        outputs = []
        
        X = self.predict_proba(X)
        
        for inputs in X:
        
            output = np.argmax(inputs)
            
            outputs.append(output)
            
        outputs = np.array(outputs)
        
        return outputs
    
    
    def predict_proba(self, X):
        
        outputs = []
        
        for inputs in X:
            
            if (self.fit_intercept):
            
                S = np.dot(inputs, self.coef_.T) + self.intercept_
            
                output = self._softmax(S)
            
            else:
                
                S = np.dot(inputs, self.coef_.T)
            
                output = self._softmax(S)
            
            outputs.append(output)
            
        outputs = np.array(outputs)
        
        return outputs

Read Training Data

In [3]:
training_set = pd.read_csv('fashion-mnist_train.csv', sep = ',')

Split Features and Labels

In [4]:
X, y = training_set.iloc[:, 1:].values, training_set.iloc[:, 0].values

K-Fold Cross Validation

In [5]:
k_folds = 5
epochs = 60
learning_rate = 0.001

k_fold = KFold(n_splits = k_folds, random_state = None, shuffle = False)

accuracies = np.array([])
current_fold = 0

total_start = time.time()

for train_index, validation_index in k_fold.split(X, y):
    
    start = time.time()
    
    current_fold += 1
    
    X_training, X_validation = X[train_index], X[validation_index]
    y_training, y_validation = y[train_index], y[validation_index] 
    
    # An grayscale image must have 255 as maximum value
    maximum_value = 255 
    
    # Executes normalization between 0.01 and 0.99 to avoid lose of neurons
    X_normalized_training = (X_training / maximum_value * 0.99) + 0.01
    X_normalized_validation = (X_validation / maximum_value * 0.99) + 0.01
    
    Y_encoded_training = one_hot_encoding(y_training)
    Y_encoded_validation = one_hot_encoding(y_validation)
    
    softmax_classifier = SoftmaxClassifier(eta0 = learning_rate, fit_intercept = True)
    
    for epoch in range(epochs):
        
        softmax_classifier.partial_fit(X_normalized_training, Y_encoded_training)
        
    y_validation_predicted = softmax_classifier.predict(X_normalized_validation)
    
    accuracy = accuracy_score(y_validation, y_validation_predicted)
    
    accuracies = np.append(accuracies, [accuracy])
    
    finish = time.time()
    
    print("Fold: " + str(current_fold) + " Accuracy: " + str(accuracy) + " Time: " + str((finish - start)) + "s")

total_finish = time.time()    

print("=====================================================================")
print("Fold Mean: " + str(accuracies.mean()) + " Time: " + str((total_finish - total_start) / 60) + " Mins")

Fold: 1 Accuracy: 0.8561666666666666 Time: 261.74021339416504s
Fold: 2 Accuracy: 0.855 Time: 260.64692068099976s
Fold: 3 Accuracy: 0.8571666666666666 Time: 243.34243750572205s
Fold: 4 Accuracy: 0.8541666666666666 Time: 246.79336643218994s
Fold: 5 Accuracy: 0.8540833333333333 Time: 243.25916242599487s
Fold Mean: 0.8553166666666666 Time: 20.929760173956552 Mins
