# Deep Learning
## Practical Deep Learning Tutorial with PyTorch - Tutorial N° 3

### 2020-2021



In [1]:
from torchvision import models

# Importing necessary libraries

In [10]:
import torch
import torch.nn as nn 
from torch.autograd import grad
import torch.nn.functional as F
import matplotlib.pyplot as plt
import numpy as np
from sklearn.datasets import make_blobs

# Adaline

1. Built ADALINE model using the nn.Module class 

In [4]:
class Adaline(torch.nn.Module):
    def __init__(self, num_features):
        super(Adaline, self).__init__()
        self.linear = torch.nn.Linear(num_features , 1) # une transformation lineare de la forme X.(W.transposé)
                                                        #X matrice de donnes qui a (num_features) variables
                                                        # chaque donne sera multiplie par un vecteur de poids 
                                                        # de taille (1,num_features), ca donne un veteur de taille
                                                        # (num_features,1)
        self.linear.weight.detach().zero_()  # changer les poids aleatoire à zero (pour l'initialisation)
        self.linear.bias.detach().zero_()  #meme chose pour le bias
        
    def forward(self, x):
        activations = self.linear(x)
        return activations.view(-1)

2. Using 'iris.txt', create a binary datasets in 2-D : The last 100 instances of iris described only by the 2nd and 3rd features
    
    Split the dataset into traing and test sets (70%,30%) 

    Normalize the dataset

In [5]:
import pandas as pd
df = pd.read_csv('C:/Users/PC/Desktop/iris.txt', index_col=None, header=None)    #lire le fichier iris
df.columns = ['x1', 'x2', 'x3', 'x4', 'y'] #renommer les colonnes
df = df.iloc[50:150]   #prendre que la 100 dernière données de la base, donc que les donnees des 2 dernieres classes
df['y'] = df['y'].apply(lambda x: 0 if x == 'Iris-versicolor' else 1) #coder la 2e classe par 0 la 3e par 1


# Assign features and target

X = torch.tensor(df[['x2', 'x3']].values, dtype=torch.float) #ne considerer que la 2e et 3e variables, c'est notre tensor X
y = torch.tensor(df['y'].values, dtype=torch.int) #le tensor y correeponds aux labels y

# Shuffling & train/test split
torch.manual_seed(123)
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)  # melanger les indices 
X, y = X[shuffle_idx], y[shuffle_idx]  # melanger les donnes, ca garde la corrspondance entre une donnée et son label
percent70 = int(shuffle_idx.size(0)*0.7)
X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]  #70 premiers points pour training 
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]  # 30 dernieres données pour le test

# Normalize (mean zero, unit variance)

mu, sigma = X_train.mean(dim=0), X_train.std(dim=0) #normalization, soustraire la moyenne diviser par lecarte type
X_train =  (X_train - mu) / sigma   
X_test = (X_test - mu) / sigma 

In [6]:
model = Adaline(num_features=X_train.size(1))
model

Adaline(
  (linear): Linear(in_features=2, out_features=1, bias=True)
)

3. Train the model : we will use MSELoss (mean squared error (squared L2 norm)) as loss function. The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [7]:
def train(model, x, y, num_epochs, learning_rate, seed):
    cost = []
    torch.manual_seed(seed)
    optimizer =  torch.optim.SGD(model.parameters() , lr=learning_rate)
    losst = torch.nn.MSELoss()
    for e in range(num_epochs):
        yhat = model.forward(x)  #calcul yhat
        loss =  losst(y , yhat)  #calcul the loss function using MSE
        print(loss.item() , 'loss')
        optimizer.zero_grad()                   # set the gradients to zero
        loss.backward()                         # calculer le gradients
        optimizer.step()                        # mise a jour des poids ####

In [8]:
train(model, X_train, y_train.float(),num_epochs=142,learning_rate=0.01,seed=123)

0.48571428656578064 loss
  allow_unreachable=True)  # allow_unreachable flag
0.4699629247188568 loss
0.45489606261253357 loss
0.4404822289943695 loss
0.42669153213500977 loss
0.4134954810142517 loss
0.40086689591407776 loss
0.3887799382209778 loss
0.37721002101898193 loss
0.3661336600780487 loss
0.35552868247032166 loss
0.3453736901283264 loss
0.33564844727516174 loss
0.3263336420059204 loss
0.317410945892334 loss
0.30886268615722656 loss
0.30067235231399536 loss
0.29282382130622864 loss
0.2853020429611206 loss
0.27809247374534607 loss
0.2711813747882843 loss
0.2645554840564728 loss
0.2582024037837982 loss
0.2521100640296936 loss
0.24626705050468445 loss
0.24066250026226044 loss
0.23528602719306946 loss
0.2301277220249176 loss
0.2251780778169632 loss
0.22042809426784515 loss
0.21586920320987701 loss
0.21149314939975739 loss
0.20729205012321472 loss
0.2032584398984909 loss
0.19938518106937408 loss
0.1956653892993927 loss
0.19209259748458862 loss
0.18866051733493805 loss
0.18536326289176

In [81]:
torch.mean( torch.tensor([1.,2.,3.,4.]) , dtype=torch.float32 )

tensor(2.5000)

4. Compute the model accuracy 

In [9]:
def custom_where(cond, x_1, x_2):
    
    return (cond * x_1) + (torch.logical_not(cond) * x_2)

train_pred = model.forward(X_train)

train_acc = torch.mean(

    (custom_where(train_pred > 0.5, 1, 0).int() == y_train).float()

)
test_pred = model.forward(X_test)
test_acc = torch.mean((custom_where(test_pred > 0.5, 1, 0).int() == y_test).float())
print('Training Accuracy: %.2f' % (train_acc*100))
print('Test Accuracy: %.2f' % (test_acc*100))
print('Weights', model.linear.weight)
print('Bias', model.linear.bias)

Training Accuracy: 92.86
Test Accuracy: 93.33
Weights Parameter containing:
tensor([[-0.0195,  0.3655]], requires_grad=True)
Bias Parameter containing:
tensor([0.4581], requires_grad=True)


# Perceptron

5. Built a Perceptron model using nn.Module class

In [80]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
def custom_where(cond, x_1, x_2):
        return (cond * x_1) + (torch.logical_not(cond) * x_2)

class Perceptron():
    def __init__(self, num_features):
        self.num_features = num_features
        self.weights = torch.zeros(num_features, 1, dtype=torch.float32, device=device)
        self.bias = torch.zeros(1, dtype=torch.float32, device=device)

    def forward(self, x):
        predictions = torch.add( torch.mm(x , self.weights) , self.bias )
        predictions = custom_where(predictions > 0. , 1 , 0).float()
        return predictions
        
    def backward(self, x, y):
        predictions = self.forward(x)
        errors = y - predictions
        return errors
        
    def train(self, x, y, epochs):
        for e in range(epochs):
            for i in range(y.shape[0]):
                errors = self.backward( x[i].view(1 , self.num_features) , y[i] ).view(-1)
                self.weights += (errors * x[i]).view(self.num_features, 1)  
                self.bias += errors 
                
    def evaluate(self, x, y):
        predictions = self.forward(x).reshape(-1)
        accuracy = torch.sum(predictions == y).float() / y.shape[0]
        return accuracy

6. Load the 'perceptron_toydata' dataset

    Split the dataset into train and test sets
    
    Normalize the data

In [81]:
df = pd.read_csv('C:/Users/PC/Desktop/toy_data.txt', index_col=None, header=None , delimiter='\t')
df.columns = ['x1', 'x2', 'y']
X = torch.tensor(df[['x1', 'x2']].values, dtype=torch.float) 
y = torch.tensor(df['y'].values, dtype=torch.int) 
print('Class label counts:', torch.bincount(y))
print('X.shape:', X.shape)
print('y.shape:', y.shape)

# Shuffling & train/test split
shuffle_idx = torch.randperm(y.size(0), dtype=torch.long)
X, y = X[shuffle_idx], y[shuffle_idx]
percent70 = int(shuffle_idx.size(0)*0.7)
X_train, X_test = X[shuffle_idx[:percent70]], X[shuffle_idx[percent70:]]
y_train, y_test = y[shuffle_idx[:percent70]], y[shuffle_idx[percent70:]]
# Normalize (mean zero, unit variance)
mu, sigma = X_train.mean(axis=0), X_train.std(axis=0)
X_train = (X_train - mu) / sigma
X_test = (X_test - mu) / sigma
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)

Class label counts: tensor([50, 50])
X.shape: torch.Size([100, 2])
y.shape: torch.Size([100])
torch.Size([70, 2])
torch.Size([70])
torch.Size([30, 2])
torch.Size([30])


7. Train the perceptron

In [64]:
model = Perceptron(num_features=2)

X_train_tensor = torch.tensor(X_train, dtype=torch.float32, device=device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32, device=device)

model.train(X_train_tensor, y_train_tensor, epochs=5)

print('Model parameters:')
print('  Weights: %s' % model.weights)
print('  Bias: %s' % model.bias)

  This is separate from the ipykernel package so we can avoid doing imports until
  after removing the cwd from sys.path.
Model parameters:
  Weights: tensor([[2.3745],
        [0.9723]])
  Bias: tensor([-1.])


8. evaluate the model (accuracy)

In [65]:
X_test_tensor = torch.tensor(X_test, dtype=torch.float32, device=device)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32, device=device)

test_acc = model.evaluate(X_test_tensor, y_test_tensor)
print('Test set accuracy: %.2f%%' % (test_acc*100))

Test set accuracy: 96.67%
  """Entry point for launching an IPython kernel.
  


# Multi Layer Perceptron

Unlike the single-layer perceptron, the Multi Layer Perceptron models have hidden layers
between the input and the output layers. After every hidden layer, an activation function 
is applied to introduce non-linearity. 

9. Built a simple Multi Layer Perceptron model withe one hidden layer. 
After the hidden layer, we will use ReLU as activation before the information is sent to the output layer.
As an output activation function, we will use Sigmoid. 

In [94]:
class MultilayerPerceptron(torch.nn.Module):
    def __init__(self, num_features,num_hidden_1):
        super(MultilayerPerceptron, self).__init__()
        self.fct = nn.Linear(num_features , num_hidden_1)
        self.relu = nn.ReLU()
        self.fct2 = nn.Linear(num_hidden_1 , 1)
        self.sgm = nn.Sigmoid()
        

    def forward(self, x):
        x = self.relu(self.fct(x))
        out = self.sgm(self.fct2(x))
        return out


10. Create a random datasets and assign binary labels {0,1}

In [95]:
def blob_label(y, label, loc): # assign labels
    target = np.copy(y)
    for l in loc:
        target[y == l] = label
    return target
x_train, y_train = make_blobs(n_samples=40, n_features=2, cluster_std=1.5, shuffle=True)
x_train = torch.FloatTensor(x_train)
y_train = torch.FloatTensor(blob_label(y_train, 0, [0]))
y_train = torch.FloatTensor(blob_label(y_train, 1, [1,2,3]))
x_test, y_test = make_blobs(n_samples=10, n_features=2, cluster_std=1.5, shuffle=True)
x_test = torch.FloatTensor(x_test)
y_test = torch.FloatTensor(blob_label(y_test, 0, [0]))
y_test = torch.FloatTensor(blob_label(y_test, 1, [1,2,3]))
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

torch.Size([40, 2])
torch.Size([40])
torch.Size([10, 2])
torch.Size([10])


11. Define the model with input dimension 2 and hidden dimension 10. 
Since the task is to classify binary labels, we can use as criterion BCELoss (Binary Cross Entropy Loss) : loss function.
The optimizer is SGD (Stochastic Gradient Descent) with learning rate 0.01.

In [96]:
model = MultilayerPerceptron(2, 10)
learning_rate = 0.01
criterion = nn.BCELoss()
optimizer = torch.optim.SGD(model.parameters() , lr=learning_rate)

12. Check the test loss before the model training and compare it with the test loss after the training.

In [97]:
model.eval()
y_pred = model(x_test)
before_train = criterion(y_pred.squeeze(), y_test)
print('Test loss before training' , before_train.item())

Test loss before training 1.057615041732788


In [98]:
model.train()
epoch = 20
for epoch in range(epoch):
    optimizer.zero_grad()
    y_pred = model(x_train)
    loss = criterion(y_pred.squeeze(), y_train)
    print('Epoch {}: train loss: {}'.format(epoch, loss.item()))
    # Backward pass
    loss.backward()
    optimizer.step()

Epoch 0: train loss: 0.6781933903694153
Epoch 1: train loss: 0.6343165636062622
Epoch 2: train loss: 0.5938801765441895
Epoch 3: train loss: 0.5567026734352112
Epoch 4: train loss: 0.5231706500053406
Epoch 5: train loss: 0.4925197660923004
Epoch 6: train loss: 0.4643898606300354
Epoch 7: train loss: 0.43858593702316284
Epoch 8: train loss: 0.4149189591407776
Epoch 9: train loss: 0.3932076394557953
Epoch 10: train loss: 0.37328004837036133
Epoch 11: train loss: 0.35497528314590454
Epoch 12: train loss: 0.338186651468277
Epoch 13: train loss: 0.3228713572025299
Epoch 14: train loss: 0.3087400496006012
Epoch 15: train loss: 0.2956806421279907
Epoch 16: train loss: 0.2835915982723236
Epoch 17: train loss: 0.2723812460899353
Epoch 18: train loss: 0.2619667947292328
Epoch 19: train loss: 0.2522739768028259


In [99]:
model.eval()
y_pred = model(x_test)
after_train = criterion(y_pred.squeeze(), y_test) 
print('Test loss after Training' , after_train.item())

Test loss after Training 1.4507834911346436
