# Deep learning

In [1]:
labels = {'A':0,'B':1,'C':2,'D':3,'E':4,'F':5,'G':6,'H':7,'I':8,'W':9,'X':10,'Y':11}

In [2]:
#https://archive.ics.uci.edu/ml/datasets/Avila
data_train = []
with open('avila-tr.txt') as fs:
    for line in fs:
        data_train.append(line.strip())

In [3]:
data_test = []
with open('avila-ts.txt') as fs:
    for line in fs:
        data_test.append(line.strip())

In [4]:
data_test[0]

'-3.498799,0.250492,0.23207,1.224178,-4.922215,1.145386,0.182426,-0.165983,-0.123005,1.087144,W'

# Creating an architecture

<img src="data/neural_net.png">

<img src="data/linear_matrix.png">

In [5]:
import torch

In [6]:
import torch.nn as nn # neural network module in Pytorch

In [7]:
class Classifier(nn.Module):
    def __init__(self,input_size,hidden_size,output_size):
        super(Classifier,self).__init__()
        
        self.i2h = nn.Linear(input_size,hidden_size) # h = wx+b
        self.sigmoid = nn.Sigmoid() # activation    # h_1 = f(wx+b)
        #self.h2h = nn.Linear(hidden_size, hidden_size)
        self.h2o = nn.Linear(hidden_size,output_size) # o = f(w_1h_1+b_1)
        self.softmax = nn.Softmax(dim=1)
        
    def forward(self,input):
        x = self.i2h(input)
        x = self.sigmoid(x)
        #x = self.h2h(x)
        #x = self.sigmoid(x)
        x = self.h2o(x)
        x = self.sigmoid(x)
        output = self.softmax(x)
        return output

# Working with Tensors

In [8]:
# the only datatype the model will understand is tensor (multi dimensional matrix)
# You will have to convert the feature vector to a tensor 
# The entries in the tensor could be float, int,....
# https://pytorch.org/docs/stable/tensors.html (lists all the available types)

In [9]:
# general way of converting to tensor - torch.tensor(<list/numpy array>, dtype=<dtype>)
x = torch.tensor([1,2,3,4])
y = torch.tensor([1,2,3,4],dtype=torch.float32)
z = torch.FloatTensor([1,2,3,4])

In [10]:
y

tensor([1., 2., 3., 4.])

In [11]:
# similar to numpy there are different ways of creating tensors
x = torch.ones((1,8))
y = torch.zeros((1,2))

In [12]:
y

tensor([[0., 0.]])

In [13]:
# similar way of accessing elements as numpy
y[0][0] = 1.0

In [14]:
y

tensor([[1., 0.]])

In [15]:
# reshaping matrix
# We want to reshape the matrix of form 3x3 to 1x9
z = torch.tensor([[1,2,3],[4,5,6],[7,8,9]])
z.shape

torch.Size([3, 3])

In [16]:
w = z.reshape(1,9)

In [17]:
w.shape

torch.Size([1, 9])

In [18]:
# in many cases that we will encounter, we won't know beforehand the shape
# In the previous example lets say we do not know if the matrix is 3x3 or 4x4... 
# But we want to 'flatten' the tensor
# So the output would be 1xn.. depending on the shape of the input tensor
q = torch.ones((4,4))
t = torch.ones((8,8))

In [19]:
w = q.reshape(1,-1)

In [20]:
w.shape

torch.Size([1, 16])

In [21]:
w = t.reshape(2,-1)

In [22]:
w.shape

torch.Size([2, 32])

In [23]:
data_train[0]

'0.266074,-0.16562,0.32098,0.483299,0.17234,0.273364,0.371178,0.929823,0.251173,0.159345,A'

In [24]:
inp = list(map(float,data_train[0].split(',')[:-1]))

In [25]:
inp

[0.266074,
 -0.16562,
 0.32098,
 0.483299,
 0.17234,
 0.273364,
 0.371178,
 0.929823,
 0.251173,
 0.159345]

In [26]:
inp = torch.FloatTensor(inp).reshape(1,-1)

In [27]:
inp

tensor([[ 0.2661, -0.1656,  0.3210,  0.4833,  0.1723,  0.2734,  0.3712,  0.9298,
          0.2512,  0.1593]])

In [28]:
inp.shape

torch.Size([1, 10])

In [29]:
cls = Classifier(10,20,12) # cls = Classifier(10,24,6) 6-> number of classes

In [30]:
out = cls(inp)

In [31]:
out

tensor([[0.0743, 0.0831, 0.0884, 0.0806, 0.0808, 0.0783, 0.0820, 0.0856, 0.0794,
         0.0942, 0.0938, 0.0793]], grad_fn=<SoftmaxBackward>)

# Calculating gradients

In [32]:
x = torch.ones((1,7),requires_grad=True) # w = 

In [33]:
y = torch.sum(x*x) # + torch.sum(w*w) # z = f(y)

In [34]:
y.backward() # dy/dx, dy/dw

In [35]:
print(x.grad)

tensor([[2., 2., 2., 2., 2., 2., 2.]])


In [36]:
w = torch.ones((1,7))

In [37]:
t = w*w

In [38]:
t.backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

# Training the model

In [None]:
criterion = nn.CrossEntropyLoss()

In [None]:
label = labels[data_train[0].split(',')[-1]]

In [None]:
label

In [None]:
label = torch.LongTensor([0])

In [None]:
label

In [None]:
loss = criterion(out,label)

In [None]:
loss

In [None]:
import torch.optim as optim # optimization module

In [None]:
optimizer = optim.SGD(cls.parameters(),lr=0.001) # optimizer for updating the parameters

In [None]:
optimizer.zero_grad() # flushes out any previously calculated gradient

In [None]:
loss.backward() # calculates gradient with respect to the parameters dL/dx

In [None]:
optimizer.step() # updates the parameters based on the optmization strategy x = x - lr*dL/dx

In [None]:
# putting it all together...
def train(data_train,cls):
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(cls.parameters(),lr=0.001)
    for d_ in data_train:
        inp = list(map(float,d_.split(',')[:-1]))
        inp = torch.FloatTensor(inp).view(1,-1)
        label = labels[d_.split(',')[-1]]
        label = torch.LongTensor([0])
        
        output = cls(inp)
        
        loss = criterion(output,label)
        optimizer.zero_grad() 
        loss.backward()
        optimizer.step() # updates the parameters wrt x = x - lr*dL/dx
        

In [None]:
train(data_train,cls)

# Testing the model

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
def test(data_test,cls):
    true_labels = []
    pred_labels = []
    
    cls.eval() # runs the model in evaluation mode
    
    with torch.no_grad(): # lets the model know that the gradients need not be claculated
        for d_ in data_test:
            inp = list(map(float,d_.split(',')[:-1]))
            inp = torch.FloatTensor(inp).view(1,-1)
            label = labels[d_.split(',')[-1]]
            true_labels.append(label)

            output = cls(inp)
            output = nn.functional.softmax(output,dim=1)
            p_label = output.argmax(dim=1)
            pred_labels.append(p_label.data.numpy())
            
            
    print(accuracy_score(true_labels,pred_labels))

In [None]:
test(data_test,cls)