## Imports

In [1]:
import pandas as pd
import numpy as np
import scipy
from scipy import sparse
from scipy.sparse import csr_matrix, lil_matrix
import pickle
import math

## Preparing Matrices

In [33]:
# the number of features
n = 61188
#number of classes
K = 20
#training instances
M = 12000

training_labels = np.load('../res/lr_labels.npy')
weights_matrix = np.random.rand(K, n + 1)
# weights_sparse is the W matrix in the pdf
weights_sparse = sparse.csr_matrix(weights_matrix)
training_data_sparse = sparse.load_npz('../res/lr_training_data.npz')
print(weights_sparse.shape)
print(training_data_sparse.shape)

XT = training_data_sparse.transpose()
print(scipy.sparse.issparse(XT))
print(XT.shape)


delta = lil_matrix(np.zeros((K, M)), dtype = np.int16)
for i,label in enumerate(training_labels):
    delta[label - 1, i] = 1
delta = delta.tocsr()
print(delta.shape)
# you see the index 1 mapping here: we need to be careful!
#how to getb the true class of a training examples
print(delta[:,0].nonzero()[0][0] + 1)
print(training_labels[0])
print(sparse.issparse(delta))

(20, 61189)
(12000, 61189)
True
(61189, 12000)
(20, 12000)
14
14
True


In [41]:
ETHA = 0.005
LAMBDA = 0.01
for i in range(100):
    #print(weights_sparse)    
    pxy = (weights_sparse * XT).todense()
    for j in range(M):
        pxy[:,j] /= np.sum(pxy[:,j])
    pxy = np.exp(pxy)
    for j in range(M):
        pxy[:,j] /= np.sum(pxy[:,j])
    #print(pxy)
    diff_exp = csr_matrix(delta - pxy)
    new_w = weights_sparse + ETHA * (diff_exp * training_data_sparse - weights_sparse.multiply(LAMBDA))
    weights_sparse = new_w
    #print('new')
    #np.nan_to_num(weights_sparse, copy=False)



# Predictions


In [44]:
print(weights_sparse.shape)
print(weights_sparse)


(20, 61189)
  (0, 0)	-1.2703401135574384
  (0, 1)	-8.441419116835922
  (0, 2)	150.34149049550697
  (0, 3)	-1.6016214406555125
  (0, 4)	23.330566411874504
  (0, 5)	-40.74334982867327
  (0, 6)	0.6423139877735796
  (0, 7)	-2.142859887464675
  (0, 8)	-11.85714839326962
  (0, 9)	78.12838494453429
  (0, 10)	-0.2639643650052481
  (0, 11)	263.49222804783795
  (0, 12)	9.721247965672609
  (0, 13)	1.3540276150655894
  (0, 14)	14.450005500175271
  (0, 15)	-4.680598297063238
  (0, 16)	146.30306981282877
  (0, 17)	4.195894417558818
  (0, 18)	6.708242438431734
  (0, 19)	9.547424866773257
  (0, 20)	1.4423034348004347
  (0, 21)	2.145685315465034
  (0, 22)	-80.47964817857547
  (0, 23)	2.621351287779854
  (0, 24)	8.589272988063131
  :	:
  (19, 61164)	0.3188975532008873
  (19, 61165)	0.3230641040624321
  (19, 61166)	0.24570273666050188
  (19, 61167)	0.12810586278571143
  (19, 61168)	0.34169167167955483
  (19, 61169)	2.1190506471025943
  (19, 61170)	1.9354101135262642
  (19, 61171)	1.8540255950316407
  (19

In [45]:
predictions = (weights_sparse * XT).todense()
for j in range(M):
    predictions[:,j] /= np.sum(predictions[:,j])
predictions = np.exp(predictions)
for j in range(M):
    predictions[:,j] /= np.sum(predictions[:,j])
for i in range(20):
    #print(predictions[:,i])
    print(np.argmax(predictions[:,i]) , end= ', ')
    print(delta[:,i].nonzero())

5, (array([13], dtype=int32), array([0], dtype=int32))
15, (array([15], dtype=int32), array([0], dtype=int32))
18, (array([7], dtype=int32), array([0], dtype=int32))
11, (array([10], dtype=int32), array([0], dtype=int32))
11, (array([11], dtype=int32), array([0], dtype=int32))
17, (array([6], dtype=int32), array([0], dtype=int32))
16, (array([14], dtype=int32), array([0], dtype=int32))
1, (array([5], dtype=int32), array([0], dtype=int32))
17, (array([4], dtype=int32), array([0], dtype=int32))
14, (array([4], dtype=int32), array([0], dtype=int32))
15, (array([15], dtype=int32), array([0], dtype=int32))
13, (array([3], dtype=int32), array([0], dtype=int32))
5, (array([5], dtype=int32), array([0], dtype=int32))
17, (array([0], dtype=int32), array([0], dtype=int32))
15, (array([1], dtype=int32), array([0], dtype=int32))
8, (array([6], dtype=int32), array([0], dtype=int32))
0, (array([3], dtype=int32), array([0], dtype=int32))
16, (array([16], dtype=int32), array([0], dtype=int32))
1, (arra