# Install dep

In [1]:
!pip install --quiet pandas
!pip install --quiet numpy

In [None]:
!pip install torch==1.7.1+cpu torchvision==0.8.2+cpu torchaudio==0.7.2 -f https://download.pytorch.org/whl/torch_stable.html

# Download data

In [2]:
!mkdir data

In [None]:
!wget https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv -P data
!ls -al data

# Import deps

In [4]:
import pandas as pd
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.utils.data as td

# Set the env

In [5]:
if torch.cuda.is_available():
   device = torch.device("cuda:0")
   print("Running on the GPU")
else:
   device = torch.device("cpu")
   print("Running on the CPU")
print(device)

Running on the GPU
cuda:0


In [6]:
torch.manual_seed(0)
torch.cuda.manual_seed_all(0)
print("Libraries imported - ready to use PyTorch", torch.__version__)

Libraries imported - ready to use PyTorch 1.9.0+cu111


# Loading and preparing data

In [7]:
# load data using pandas and drop the rows that contain null values
df = pd.read_csv('./data/penguins.csv').dropna()
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,MALE


## Encoding the labels

In [8]:
possible_labels = df['species'].unique()
print(possible_labels)

label_dict = {}
for index, possible_label in enumerate(possible_labels):
    label_dict[possible_label] = index

print(label_dict)

['Adelie' 'Chinstrap' 'Gentoo']
{'Adelie': 0, 'Chinstrap': 1, 'Gentoo': 2}


In [9]:
df['label'] = df['species'].replace(label_dict)
df.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex,label
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE,0
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE,0
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE,0
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE,0
5,Adelie,Torgersen,39.3,20.6,190.0,3650.0,MALE,0


In [10]:
features = ['bill_length_mm', 'bill_depth_mm', 'flipper_length_mm', 'body_mass_g']
labels = 'label'

# Train and validation split

In [11]:
# 70% and 30% for training and testing
x_train, x_test, y_train, y_test = train_test_split(df[features].values, df[labels].values, test_size=0.30, random_state=0)
print ('Training Set: %d, Test Set: %d \n' % (len(x_train), len(x_test)))

Training Set: 233, Test Set: 100 



In [12]:
# Prepare the data for Pytorch model
# Preparing Train data
train_x = torch.Tensor(x_train).float()
train_y = torch.Tensor(y_train).long()
train_ds = td.TensorDataset(train_x, train_y)
train_loader = td.DataLoader(train_ds, batch_size=20, shuffle=False, num_workers=1)

# Preparing Test data
test_x = torch.Tensor(x_test).float()
test_y = torch.Tensor(y_test).long()
test_ds = td.TensorDataset(test_x, test_y)
test_loader = td.DataLoader(test_ds, batch_size=20, shuffle=False, num_workers=1)

# Deep neural network model

In [14]:
class NModel(nn.Module):
    def __init__(self):
        super(NModel, self).__init__()
        # first layer takes input based on the features space
        self.fc1 = nn.Linear(len(features), 10)
        # 10 neurons for both the first and second hidden layers
        self.fc2 = nn.Linear(10, 10)
        # third layer is the output layer which will produce the label spaces
        self.fc3 = nn.Linear(10, 3)
        
    def forward(self, x):
        # first two layers will use the ReLU activation function
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        # output layer will use the Softmax activation function
        x = torch.softmax(self.fc3(x), dim=1)
        return x
        
model = NModel()
print(model)

NModel(
  (fc1): Linear(in_features=4, out_features=10, bias=True)
  (fc2): Linear(in_features=10, out_features=10, bias=True)
  (fc3): Linear(in_features=10, out_features=3, bias=True)
)


# Define Optimizer and Scheduler

In [15]:
# Specify the loss criteria (CrossEntropyLoss for multi-class classification)
loss_criteria = nn.CrossEntropyLoss()

# We are using Adam optimizer
learning_rate = 0.001 # try different learning rate
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) # setting the Adam algorithm
optimizer.zero_grad()

# Training function

In [20]:
def train(model, data_loader, optimizer):
    # Set the model to training mode
    model.train()
    train_loss = 0
    
    for batch, tensor in enumerate(data_loader):
        data, target = tensor
                
        # feedforward step
        optimizer.zero_grad()
        out = model(data)
        loss = loss_criteria(out, target)
        train_loss += loss.item()
        
        # backpropagation to improve the model performances
        loss.backward()
        optimizer.step()
    
    # Return average loss of the training 
    avg_loss = train_loss / (batch+1)
    print('Training set: Average loss: {:.6f}'.format(avg_loss))
    
    # return result
    return avg_loss

# Test function

In [21]:
def test(model, data_loader):
    model.eval()
    
    test_loss = 0
    correct = 0

    with torch.no_grad():
        batch_count = 0
        for batch, tensor in enumerate(data_loader):
            batch_count += 1
            data, target = tensor
            
            # Get the predictions
            out = model(data)
            
            # calculate the loss
            test_loss += loss_criteria(out, target).item()
            
            # Calculate the accuracy
            _, predicted = torch.max(out.data, 1)
            correct += torch.sum(target==predicted).item()
            
        # Calculate the average loss and total accuracy for this epoch
        avg_loss = test_loss/batch_count
        
        print('Validation set: Average loss: {:.6f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        avg_loss, correct, len(data_loader.dataset), 100. * correct / len(data_loader.dataset)))
    
        # return average loss for the epoch
        return avg_loss

# Training

In each epoch, the full set of training data is passed forward through the network. In each epoch, 20 batches of data are provided. 

Typically, a neural network consists of forwarding propagation and backpropagation.

In each layer, each neuron uses an activation function to produce neurons' weights and biases! 
At the output layer, the loss function is used to validate model performances. 

That’s where the backpropagation comes in. Backpropagation is useful to improve model weights and biases, which eventually helps to produce a better model. 

Also, at the end of each epoch, the validation data is passed through the network, and its loss and accuracy are also calculated. 

It's important to do this because it enables us to compare the performance of the model using data on which it was not trained, helping us determine if it will generalize well for new data or if it’s overfitted to the training data.

In [None]:
# We'll be saving metrics for each epoch in these arrays
epoch_nums = []
training_loss = []
validation_loss = []

# Train over 50 epochs
epochs = 50
for epoch in range(1, epochs + 1):
    print('Epoch: {}'.format(epoch))
    
    # Feed training data into the model to optimize the weights
    train_loss = train(model, train_loader, optimizer)
    
    # Feed the test data into the model to check its performance
    test_loss = test(model, test_loader)
    
    # Log the metrics for this epoch
    epoch_nums.append(epoch)
    training_loss.append(train_loss)
    validation_loss.append(test_loss)

# Print out model weights and biases

In [None]:
for param_tensor in model.state_dict():
  print(param_tensor, "\n", model.state_dict()[param_tensor].numpy())