In [127]:
import numpy as np
import pandas as pd
from sklearn.preprocessing import LabelBinarizer
from scipy.special import softmax

encoder = LabelBinarizer()
from sklearn.metrics import classification_report
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import multilabel_confusion_matrix as confusion_matrix

In [128]:
data = pd.read_csv('../data/train.csv')
# n = 15000  # number of train data
m = 10  # number of validation data
c = 784  # number of features
n_classes = 10  # number of classes

train_set = data.sample(frac=0.9)
n = len(train_set)
X = train_set.loc[:, train_set.columns != 'label']
Y = train_set.loc[:, train_set.columns == 'label']

validation_set = data.drop(train_set.index)
validation_set_features = validation_set.loc[:, validation_set.columns != 'label']
validation_set_labels = validation_set.loc[:, validation_set.columns == 'label']

In [129]:
eta = 0.00001
mu = 0.00001

In [130]:
# loss function
def loss(X, Y, W):
    Z = - (np.matmul(X, W))
    return 1 / n * (np.trace(X @ W @ Y.T) + np.sum(np.log(np.sum(np.exp(Z), axis=1))))

In [131]:
def predict(x, W):
    z = - (np.matmul(x, W))
    p = softmax(z)  # probabilities for the item per class
    return np.argmax(p, axis=1)  # index of class with maximum probability

In [132]:
def gradient_descent(X, Y, W):
    Z = - (np.matmul(X, W))
    P = softmax(Z)
    return 1 / n * np.matmul(X.T, (Y - P)) + 2 * mu * W

In [133]:
def gradient(X, Y, max_iter = 50000):
    W = np.zeros((c, n_classes)) # Weights matrix c x 10 (number of features) x (number of classes)
    Y_oh = encoder.fit_transform(Y) # matrix of n x n_classes, we multiply features by weights and add bias

    step = 0
    while step < max_iter:
        step += 1
        result = gradient_descent(X, Y_oh, W)
        W = -eta * result

    return W

In [134]:
import time
start_time = time.perf_counter ()
W = gradient(X, Y)
end_time = time.perf_counter ()
print(end_time - start_time, "seconds")

  Z = - (np.matmul(X, W))


5159.883490958004 seconds


In [135]:
predictions = predict(validation_set_features, W)
predictions

  z = - (np.matmul(x, W))


array([8, 7, 0, ..., 7, 2, 8])

In [136]:
validation_set_labels

Unnamed: 0,label
10,8
18,7
21,6
36,3
45,6
...,...
41971,3
41972,4
41980,7
41981,2


In [137]:
report = classification_report(validation_set_labels, predictions)
precision, recall, fscore, support = score(validation_set['label'], predictions, average='weighted')
print('Precision : {}'.format(precision))
print('Recall    : {}'.format(recall))
print('F-score   : {}'.format(fscore))

Precision : 0.6908606112468503
Recall    : 0.6695238095238095
F-score   : 0.6444916681167219


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
