In [1]:
import time

import numpy as np
import pandas as pd

from copy import deepcopy

from sklearn.metrics import accuracy_score
from sklearn.metrics import log_loss
from sklearn.model_selection import KFold

from matplotlib import pyplot as plt

In [2]:
class SGDClassifier:
    
    def __init__(self, eta0 = 0.01, fit_intercept = True):
        
        self.eta0 = eta0
        self.fit_intercept = fit_intercept
        
        self.classes_ = np.array([])
        self.n_classes_ = 0
        
        self.intercept_ = np.array([])
        self.coef_ = np.array([])
    
    def _sigmoid(self, z):
        
        result = 1 / (1 + np.exp(-z))
        
        return result
    
    def partial_fit(self, X, y, classes):
        
        if (not self.classes_.any()):
            self.classes_ = classes
            self.n_classes_ = len(classes)
        
        if(not self.intercept_.any()):
            self.intercept_ = np.zeros(self.n_classes_)
        
        if(not self.coef_.any()):
            self.coef_ = np.zeros((self.n_classes_, X.shape[1]))
        
        for n_class in range(self.n_classes_):
            
            classe = classes[n_class]
            
            _y = np.array(y, copy = True)
        
            _y[_y != classe] = -1
            _y[_y == classe] = 1
            _y[_y == -1] = 0
                      
            for inputs, target  in zip(X,  _y):
                
                if (self.fit_intercept):

                    z = np.dot(inputs, self.coef_[n_class]) + self.intercept_[n_class]
                    h = self._sigmoid(z)

                    self.intercept_[n_class] = self.intercept_[n_class] - (self.eta0 * (h - target))
                    self.coef_[n_class] = self.coef_[n_class] - (self.eta0 * (h - target) * inputs)

                else:

                    z = np.dot(inputs, self.coef_[n_class])
                    h = self._sigmoid(z)

                    self.coef_[n_class] = self.coef_[n_class] - (self.eta0 * (h - target) * inputs)
                        
    def predict(self, X):
        
        outputs = []
        
        X = self.predict_proba(X)
        
        for inputs in X:
        
            output = np.argmax(inputs)
            
            outputs.append(output)
            
        outputs = np.array(outputs)
        
        return outputs
        
    
    def predict_proba(self, X):
        
        outputs = []
        
        for inputs in X:
            
            output = np.array([])
            
            for n_class in range(self.n_classes_):
                
                h = 0

                if (self.fit_intercept):

                    z = np.dot(inputs, self.coef_[n_class]) + self.intercept_[n_class]
                    h = self._sigmoid(z)

                else:

                    z = np.dot(inputs, self.coef_[n_class])
                    h = self._sigmoid(z)
                
                output = np.append(output, h)
                
            output = (1 / output.sum()) * output
            
            outputs.append(output)
            
        outputs = np.array(outputs)
            
        return outputs

Read Training Data

In [3]:
training_set = pd.read_csv('fashion-mnist_train.csv', sep = ',')

Split Features and Labels

In [4]:
X, y = training_set.iloc[:, 1:].values, training_set.iloc[:, 0].values

K-Fold Cross Validation

In [5]:
k_folds = 5
epochs = 60
learning_rate = 0.001

k_fold = KFold(n_splits = k_folds, random_state = None, shuffle = False)

accuracies = np.array([])
current_fold = 0

total_start = time.time()

for train_index, validation_index in k_fold.split(X, y):
    
    start = time.time()
    
    current_fold += 1
    
    X_training, X_validation = X[train_index], X[validation_index]
    y_training, y_validation = y[train_index], y[validation_index] 
    
    # An grayscale image must have 255 as maximum value
    maximum_value = 255 
    
    # Executes normalization between 0.01 and 0.99 to avoid lose of neurons
    X_normalized_training = (X_training / maximum_value * 0.99) + 0.01
    X_normalized_validation = (X_validation / maximum_value * 0.99) + 0.01
    
    one_vs_all_classifier = SGDClassifier(eta0 = learning_rate, fit_intercept = True)
    
    classes = np.unique(y_training)
    
    for epoch in range(epochs):
        
        one_vs_all_classifier.partial_fit(X_normalized_training, y_training, classes)
        
    y_validation_predicted = one_vs_all_classifier.predict(X_normalized_validation)
    
    accuracy = accuracy_score(y_validation, y_validation_predicted)
    
    accuracies = np.append(accuracies, [accuracy])
    
    finish = time.time()
    
    print("Fold: " + str(current_fold) + " Accuracy: " + str(accuracy) + " Time: " + str((finish - start)) + "s")

total_finish = time.time()    

print("=====================================================================")
print("Fold Mean: " + str(accuracies.mean()) + " Time: " + str((total_finish - total_start) / 60) + " Mins")

Fold: 1 Accuracy: 0.8545833333333334 Time: 398.61871457099915s
Fold: 2 Accuracy: 0.8513333333333334 Time: 375.110867023468s
Fold: 3 Accuracy: 0.85475 Time: 375.87298250198364s
Fold: 4 Accuracy: 0.85075 Time: 341.56918001174927s
Fold: 5 Accuracy: 0.8486666666666667 Time: 333.03755402565s
Fold Mean: 0.8520166666666666 Time: 30.403580244382223 Mins
