In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

# from .utils import *

In [101]:
class MulticlassLR():
    def __init__(self, num_classes):
        self.num_classes = num_classes
        self.theta = None
        self.theta_history = []
        self.loss_history = []
        self.X = None
        self.y = None
        
    def init_data(self, X, y):
        o = np.ones((X.shape[0], 1))
        self.X = np.concatenate((o, X), axis=1)
        self.y = y
        
    def indicator(self, i, j):
        if i==j:
            return 1
        else:
            return 0

    def probab(self, theta, x_i, class_no):
        theta_class = theta[class_no]
        numer = np.exp(theta_class @ x_i.T)
        denom = np.sum(np.exp(theta @ x_i.T))
        p = numer/denom
        return p
    
    def delta_J(self, theta, class_no):
        '''
        Function to calculate gradient of theta wrt class_no class
        '''
        grad = np.zeros(self.theta.shape[1])
        num_samples = self.X.shape[0]
        for i in range(num_samples):
            grad -= self.X[i] * (self.indicator(self.y[i], class_no) - self.probab(theta, self.X[i], class_no))
        grad /= num_samples
        return grad

In [102]:
from sklearn.datasets import load_digits
from sklearn.preprocessing import MinMaxScaler

normalized = MinMaxScaler(feature_range=(-1,1))

digits_dataset = load_digits()

data = digits_dataset['data']
data = normalized.fit_transform(data)
target = digits_dataset['target']

df = pd.DataFrame(data, columns=digits_dataset['feature_names'])
df['target'] = target

X = df.drop(['target'], axis=1)
y = df['target']

num_classes = len(digits_dataset['target_names'])

In [103]:
X_arr = np.array(X)

In [104]:
y_arr = np.array(y)

In [105]:
X_arr[0]

array([-1.        , -1.        , -0.375     ,  0.625     ,  0.125     ,
       -0.875     , -1.        , -1.        , -1.        , -1.        ,
        0.625     ,  0.875     ,  0.25      ,  0.875     , -0.375     ,
       -1.        , -1.        , -0.625     ,  0.875     , -0.75      ,
       -1.        ,  0.375     ,  0.        , -1.        , -1.        ,
       -0.46666667,  0.5       , -1.        , -1.        ,  0.        ,
        0.06666667, -1.        , -1.        , -0.28571429,  0.        ,
       -1.        , -1.        ,  0.125     ,  0.14285714, -1.        ,
       -1.        , -0.5       ,  0.375     , -1.        , -0.875     ,
        0.5       , -0.125     , -1.        , -1.        , -0.75      ,
        0.75      , -0.375     ,  0.25      ,  0.5       , -1.        ,
       -1.        , -1.        , -1.        , -0.25      ,  0.625     ,
        0.25      , -1.        , -1.        , -1.        ])

In [106]:
lr = MulticlassLR(num_classes)

In [107]:
lr.init_data(X_arr, y_arr)

In [108]:
num_features = X_arr.shape[1]
num_features

64

In [109]:
theta_test = np.random.randn(num_classes, num_features+1)

In [110]:
theta_test.shape

(10, 65)

In [111]:
x_trial = np.asarray([1] + list(X_arr[0]))

In [112]:
x_trial.shape

(65,)

In [113]:
lr.probab(theta_test, x_trial, 2)

0.5006549722436705

In [114]:
lr.theta = theta_test

In [115]:
lr.delta_J(theta_test, 2)

array([-0.02127934,  0.02127934,  0.00500134, -0.02296912, -0.01833715,
        0.00379665,  0.01699153,  0.02192766,  0.02128286,  0.02016662,
       -0.01519666, -0.01187734, -0.0264633 , -0.02016878,  0.04477518,
        0.02017109,  0.02127975,  0.0207238 ,  0.00959232,  0.05254451,
       -0.00530946, -0.06364482,  0.06443366,  0.03872745,  0.02128061,
        0.02127935,  0.05045264,  0.09800123,  0.00486506, -0.07136199,
        0.06524195,  0.07073519,  0.02127949,  0.02127934,  0.06021807,
        0.07105043, -0.06510188, -0.07891501,  0.07843899,  0.09443799,
        0.02127934,  0.02127938,  0.03561767,  0.02406466, -0.11028624,
       -0.04867673,  0.08891598,  0.07325673,  0.02072759,  0.02086206,
        0.01217125, -0.01313919, -0.07462343, -0.04076967,  0.01716248,
       -0.02158183,  0.01241862,  0.02016637,  0.0046573 , -0.02784688,
       -0.01127695, -0.00987265, -0.05082776, -0.05892349,  0.00202969])

In [116]:
X.shape

(1797, 64)

In [117]:
o = np.ones((X.shape[0], 1))
o.shape

(1797, 1)

In [118]:
np.concatenate((o, X), axis=1)

array([[ 1.   , -1.   , -1.   , ..., -1.   , -1.   , -1.   ],
       [ 1.   , -1.   , -1.   , ...,  0.25 , -1.   , -1.   ],
       [ 1.   , -1.   , -1.   , ...,  1.   ,  0.125, -1.   ],
       ...,
       [ 1.   , -1.   , -1.   , ..., -0.25 , -1.   , -1.   ],
       [ 1.   , -1.   , -1.   , ...,  0.5  , -1.   , -1.   ],
       [ 1.   , -1.   , -1.   , ...,  0.5  , -0.875, -1.   ]])

In [119]:
X.shape

(1797, 64)

In [120]:
X_arr[0]

array([-1.        , -1.        , -0.375     ,  0.625     ,  0.125     ,
       -0.875     , -1.        , -1.        , -1.        , -1.        ,
        0.625     ,  0.875     ,  0.25      ,  0.875     , -0.375     ,
       -1.        , -1.        , -0.625     ,  0.875     , -0.75      ,
       -1.        ,  0.375     ,  0.        , -1.        , -1.        ,
       -0.46666667,  0.5       , -1.        , -1.        ,  0.        ,
        0.06666667, -1.        , -1.        , -0.28571429,  0.        ,
       -1.        , -1.        ,  0.125     ,  0.14285714, -1.        ,
       -1.        , -0.5       ,  0.375     , -1.        , -0.875     ,
        0.5       , -0.125     , -1.        , -1.        , -0.75      ,
        0.75      , -0.375     ,  0.25      ,  0.5       , -1.        ,
       -1.        , -1.        , -1.        , -0.25      ,  0.625     ,
        0.25      , -1.        , -1.        , -1.        ])

In [121]:
X_arr

array([[-1.   , -1.   , -0.375, ..., -1.   , -1.   , -1.   ],
       [-1.   , -1.   , -1.   , ...,  0.25 , -1.   , -1.   ],
       [-1.   , -1.   , -1.   , ...,  1.   ,  0.125, -1.   ],
       ...,
       [-1.   , -1.   , -0.875, ..., -0.25 , -1.   , -1.   ],
       [-1.   , -1.   , -0.75 , ...,  0.5  , -1.   , -1.   ],
       [-1.   , -1.   ,  0.25 , ...,  0.5  , -0.875, -1.   ]])

In [124]:
theta_test.shape

(10, 65)

In [127]:
X_arr = lr.X
X_arr.shape

(1797, 65)

In [140]:
np.dot(X_arr, theta_test.T).shape

(1797, 10)

In [142]:
(X_arr @ theta_test.T).shape

(1797, 10)

In [143]:
mat_prod = X_arr @ theta_test.T

In [148]:
sums = mat_prod.sum(axis=1)

In [151]:
mat_prod.shape

(1797, 10)

In [152]:
sums

array([-11.87080472,  -5.0907267 , -16.39695381, ..., -23.04242523,
       -32.28288923, -29.63008347])

In [153]:
1/sums

array([-0.08424029, -0.19643561, -0.06098694, ..., -0.04339821,
       -0.03097616, -0.03374948])

In [156]:
sums.shape

(1797,)

In [157]:
mat_prod[0]/sums[0]

array([-0.42625507,  0.49124055, -0.5953361 ,  0.65713184, -0.16732141,
        0.49002812, -0.58228576,  0.76860264,  0.26676136,  0.09743383])

In [160]:
mat_prod.max(axis=1).shape

(1797,)

In [195]:
def softmax(z):
    tmp = z
    for i in range(z.shape[0]):
        tmp[i]-=max(tmp[i])
        tmp[i] = np.exp(tmp[i])
        tmp[i]/=tmp[i].sum()
    return tmp
#     print(sums)
#     return z @ (1/sums)

In [196]:
tmp = np.zeros((3,2))
tmp[0,1] = 2
tmp[2,0] = 3
tmp[1,0] = 4
tmp[0,0] = 1
tmp

array([[1., 2.],
       [4., 0.],
       [3., 0.]])

In [197]:
softmax(tmp)

array([[0.26894142, 0.73105858],
       [0.98201379, 0.01798621],
       [0.95257413, 0.04742587]])