###  Fully Connected Layer

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split 
from sklearn.metrics import classification_report, confusion_matrix ,  accuracy_score
from sklearn import datasets 
import math

<b>Toy data</b>

In [2]:
X=np.array([[1, 2], [4, 5], [12, 7], [16, 10], [20, 5], [5, 8]  ])
y=np.array([[0,1], [0,1], [1,0], [1,0],  [0,1], [1,0] ])
print(X.shape, y.shape)

(6, 2) (6, 2)


In [3]:
print(y)

[[0 1]
 [0 1]
 [1 0]
 [1 0]
 [0 1]
 [1 0]]


<b>Softmax</b>
$$Let, a=[a_0, a_1, a_2, ..]$$
$$S_i=\frac{e^{a_i}}{\sum_{k=0}^{N} {e^{a_k}}}$$

<b>First Task : Implement Softmax</b> <br>
1. Implement both ways: Using loop and Without using loop. <br>
2. What challanges did you face?
3. Check output manually.

In [4]:
z=np.array( [ [1.2, 1.6], [2.4, 3.4],  [3.8, 5.8],  [5.1, 7.8],  [4. , 6.6],   [3.4, 4.8]])
print(z)

[[1.2 1.6]
 [2.4 3.4]
 [3.8 5.8]
 [5.1 7.8]
 [4.  6.6]
 [3.4 4.8]]


In [5]:
#a=softmax(z)

# Using Loop

L=[]
def sign(a):
    d1=math.exp(a[0])
    d2=math.exp(a[1])
    d=d1+d2
    return [math.exp(a[0])/d,math.exp(a[1])/d];


for i in z:
    L.append(sign(i))

#print(L)
L=np.array(L)
print(L)

[[0.40131234 0.59868766]
 [0.26894142 0.73105858]
 [0.11920292 0.88079708]
 [0.06297336 0.93702664]
 [0.06913842 0.93086158]
 [0.19781611 0.80218389]]


In [6]:
from scipy.special import softmax
soft=softmax(z,axis=1)

#print(aa)

for i in soft:
    for j in i:
        print(j,end=' ')
    print()

0.4013123398875481 0.5986876601124522 
0.26894142136999505 0.7310585786300048 
0.11920292202211759 0.8807970779778826 
0.06297335605699647 0.9370266439430033 
0.06913842034334684 0.9308615796566532 
0.19781611144141834 0.802183888558582 


<b>Second Task : calculate feedforward output</b><br><br>
$A=softmax(X.w+b)$

Output should look like the following. <br>

[[0.401 0.599] <br>
 [0.269 0.731] <br>
 [0.119 0.881] <br>
 [0.063 0.937] <br>
 [0.069 0.931] <br>
 [0.198 0.802]]

In [7]:
b=np.array([0.5, 0.6])
w=np.array([ [0.1,0.2], [0.3,0.4] ])

In [8]:
z=np.dot(X,w)+b
print(z)

A=softmax(z,axis=1)
print(A)

[[1.2 1.6]
 [2.4 3.4]
 [3.8 5.8]
 [5.1 7.8]
 [4.  6.6]
 [3.4 4.8]]
[[0.40131234 0.59868766]
 [0.26894142 0.73105858]
 [0.11920292 0.88079708]
 [0.06297336 0.93702664]
 [0.06913842 0.93086158]
 [0.19781611 0.80218389]]


<b>Third Task : calculate log loss</b><br><br> 
Note: <b>without using any loop.</b> <br>
$$L_i = - y_i* \sum \log (A_i)$$
\begin{equation*}
L =\frac{1}{N} \sum_i L_i 
\end{equation*}

Output should look like the following. <br>
Loss: [0.513 0.313 2.127 2.765 0.072 1.62 ] <br>
average loss: 1.2351

In [9]:
Loss= np.sum(-y*np.log(A),axis=1, keepdims=True)
print("Loss\n ",Loss)
print()
print("Average Loss :", np.average(L))

Loss
  [[0.51301525]
 [0.31326169]
 [2.12692801]
 [2.76504356]
 [0.07164469]
 [1.62041741]]

Average Loss : 0.5


<b>Fourth Task : calculate gradient</b><br>
Note: <b>calculate dz, dw, db without using loop</b> </br>
$$dz=A-y$$ 
$$dw=\frac{1}{N} ( X^T.dz )$$   
$$db=\frac{1}{N} \sum dz $$ 

Dimension check: y-yh= (2xN).(NX2)=(2x2) 

dw should look like below, <br>
[[-4.452,  4.452], <br>
[-3.243,  3.243]])

db should look like below<br>
[[-0.313,  0.313]]

In [10]:
dz=A-y
print("Dz : \n",dz)

x=X.transpose()
dw=np.dot(x,dz)/len(dz)

print('Dw : \n',dw)

db=sum(dz)/len(dz)

print('Db : \n',db)

Dz : 
 [[ 0.40131234 -0.40131234]
 [ 0.26894142 -0.26894142]
 [-0.88079708  0.88079708]
 [-0.93702664  0.93702664]
 [ 0.06913842 -0.06913842]
 [-0.80218389  0.80218389]]
Dw : 
 [[-4.45217737  4.45217737]
 [-3.2433822   3.2433822 ]]
Db : 
 [-0.3134359  0.3134359]


<b>Fifth Task : Train the network using gradient descent optimization</b>

In [11]:
lr=0.01
epoch=1000

In [12]:
#init w and b randomly.
D=X.shape[1]
C=y.shape[1]
w = 0.01 * np.random.randn(D,C)
b = np.zeros((1,C))

 1. Init w and b randomly
 2. repeat the following
     3. calculate feedforward output
     4. calculate gradient
     5. update w and b (w=w-lr\*dw,  b=b-lr*db)
     6. calculate loss and keep track.

7. After training plot the loss
8. test prediction on the X data

<b>Digits Data</b>

In [20]:
datas=datasets.load_digits()
data=datas['data']
label=datas['target'] 

In [21]:
X,X_test, y, y_test=train_test_split(data, label, test_size = 0.2)
print('Training shape:', X.shape, y.shape)
print('Testing shape:',X_test.shape, y_test.shape)

Training shape: (1437, 64) (1437,)
Testing shape: (360, 64) (360,)


In [22]:
N,D=X.shape            #num_data, data_dimension
C=np.max(y)+1          #number of classes

In [23]:
C

10

In [25]:
y
y_onehot=np.zeros((len(y), C))
y_onehot[ range(len(y)), y]=1
print(y_onehot)
y=np.copy(y_onehot)

[[0. 0. 0. ... 0. 0. 1.]
 [0. 0. 0. ... 0. 0. 0.]
 [1. 0. 0. ... 0. 0. 0.]
 ...
 [0. 0. 1. ... 0. 0. 0.]
 [0. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]]


<b>Sixth Task : Train on the digits data [Note: y is not one-hot vector now]</b>
1. Train a two layer[Input, Output] network
2. Plot loss, tune learning rate, number of epochs
3. predict on test data and show accuracy.  [accuracy should be above 90%]
4. Play with the learning rate and number of epochs [try few values]