# Note:
- This notebook file may contain methods or algorithms that are NOT covered by the teaching content of BT4222 and hence will not be assessed in your midterm exam.
- It serves to increase your exposure in depth and breath to the practical methods in addressing the specific project topic. We believe it will be helpful for your current project and also your future internship endeavors.

# **Import Library**

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset, DataLoader
import torch.optim as optim
from torchvision import datasets, transforms
from torch.optim.lr_scheduler import StepLR

# **Define Network Structure**
Here, I use a 2-layer CNN for classification, the network structure is as followed:

```
conv1.weight     torch.Size([32, 1, 3])
conv1.bias       torch.Size([32])
Bn1.weight       torch.Size([32])
Bn1.bias         torch.Size([32])
Bn1.running_mean         torch.Size([32])
Bn1.running_var          torch.Size([32])
Bn1.num_batches_tracked          torch.Size([])
conv2.weight     torch.Size([32, 32, 3])
conv2.bias       torch.Size([32])
Bn2.weight       torch.Size([32])
Bn2.bias         torch.Size([32])
Bn2.running_mean         torch.Size([32])
Bn2.running_var          torch.Size([32])
Bn2.num_batches_tracked          torch.Size([])
fc1.weight       torch.Size([5, 416])
c1.bias         torch.Size([5])
```

self.conv1 = nn.Conv1d(1, 32, 3, 1, 1, bias=True): The first layer is a 1D convolutional layer that takes an input with 1 channel and outputs 32 channels. The kernel size is 3, stride is 1 and padding is 1. If the input size to this layer is (batch_size, 1, L), the output size would be (batch_size, 32, L), given the padding is set to 1 to preserve the length of data. (L is 50 and batch_size is 640 here)

self.Bn1 = nn.BatchNorm1d(32): The next layer is a 1D batch normalization layer. Batch normalization helps to stabilize the learning process and reduces the number of training steps required. It achieves this by normalizing the output of the previous layer.

self.pool1 = nn.AvgPool1d(kernel_size=2, stride=2): This is an average pooling layer. Pooling is used to reduce the spatial dimensions of the data while preserving the most important features. It applies a 1D sliding window of size 2 (the kernel size) over the input, taking strides of 2, and returns the average value in each window.

self.fc1 = nn.Linear(32\*12, 5, bias=True): This is a fully connected (linear) layer that takes a 1D tensor with a length of 32*12 (the number of channels from the previous layer multiplied by the length of the data after two pooling operations) and outputs a tensor of length 5.

x = torch.flatten(x, 1): Before the output is passed to the fully connected layer, it needs to be flattened from a 3D tensor to a 2D tensor. This is because a fully connected layer expects inputs to be of shape (batch_size, num_features). Here, the flatten operation takes every 1D tensor in the batch (all dimensions except the first dimension), and flattens it into a single dimension.



In [None]:
class Net(nn.Module):  # Defines a new neural network architecture as a class that inherits from the PyTorch base class nn.Module.
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv1d(1, 32, 3, 1,1, bias=True)
        # Define the first 1D convolution layer. Takes 1 input channel, outputs 32 channels, kernel size is 3, stride is 1, padding is 1.
        self.Bn1 = nn.BatchNorm1d(32)
        # Apply Batch Normalization to the output of the first convolutional layer.
        self.pool1 = nn.AvgPool1d(kernel_size=2, stride=2)
        # Apply 1D Average Pooling after the first Batch Normalization. The kernel size and stride are 2.

        self.conv2 = nn.Conv1d(32, 32, 3, 1,1, bias=True)
        self.Bn2 = nn.BatchNorm1d(32)
        self.pool2 = nn.AvgPool1d(kernel_size=2, stride=2)

        self.fc1 = nn.Linear(32*12, 5, bias=True)
        # Define a linear layer (fully connected layer). It takes 32*12 inputs and outputs 5 nodes.


    def forward(self, x):
        x = F.relu(self.Bn1(self.conv1(x)))
        # Pass the input through the first convolutional layer, then Batch Normalization, and then apply ReLU activation.
        x = self.pool1(x)
        # Apply Average Pooling to the output of the previous step.
        x = F.relu(self.Bn2(self.conv2(x)))
        x = self.pool2(x)
        x = torch.flatten(x, 1)
        # Flatten the output from the previous step. This is necessary because fully connected layers expect a 1D input.
        x = self.fc1(x)
        # Pass the flattened output through the fully connected layer. This is the output of the network.
        return x

# **Training and Testing**

In [None]:
def train(args, model, device, train_loader, optimizer, epoch):
    model.train()  # Set the model to training mode

    for batch_idx, (data, target) in enumerate(train_loader):  # Loop over each batch from the training set
        data, target = data.to(device), target.to(device)  # Move the data to the device that is used

        target = target-1  # Adjust the target values (Moving 1-5 to 0-4  for easy training)
        target = target.long()  # Make sure that target data is long type (necessary for loss function)

        optimizer.zero_grad()  # Clear gradients from the previous training step
        output = model(data)  # Run forward pass (model predictions)

        loss = F.cross_entropy(output, target)  # Calculate the loss between the output and target
        loss.backward()  # Perform backpropagation (calculate gradients of loss w.r.t. parameters)
        optimizer.step()  # Update the model parameters

        if batch_idx % args.log_interval == 0:  # Print log info for specified interval
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(epoch, batch_idx * len(data), len(train_loader.dataset),100. * batch_idx / len(train_loader), loss.item()))



def test(model, device, test_loader):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0

    with torch.no_grad():  # Deactivates autograd, reduces memory usage and speeds up computations
        for data, target in test_loader:  # Loop over each batch from the testing set
            data, target = data.to(device), target.to(device)  # Move the data to the device that is used
            target = target-1  # Adjust the target values
            output = model(data)  # Run forward pass (model predictions)
            pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability as the predicted output
            correct += pred.eq(target.view_as(pred)).sum().item()  # Count correct predictions

    test_loss /= len(test_loader.dataset)  # Calculate the average loss

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(test_loss, correct, len(test_loader.dataset),100. * correct / len(test_loader.dataset)))
    return correct  # Return the number of correctly classified samples


# **Hyperparameter**

1. epochs: The number of times the entire dataset is passed forward and backward through the neural network.

2. lr: Learning rate, which determines the step size at each iteration while moving towards a minimum in the loss function.

3. use_cuda: A boolean flag indicating whether to use CUDA (NVIDIA's parallel computing platform and API) for computations. This would be set to True if you want to utilize GPU acceleration.

4. gamma: Typically used in learning rate scheduling. It's a factor by which the learning rate is reduced at certain intervals or when certain conditions are met.

5. log_interval: The interval in terms of batches during training.

6. seed: A seed value for random number generators to ensure reproducibility of results.

For simple networks and small datasets, we typically set the learning rate to 1, the number of epochs to 10 and gamma to 0.7 for model training.

In [None]:
class Args:
  epochs = 10
  lr = 1.0
  use_cuda=False
  gamma = 0.7
  log_interval = 10
  seed = 1

args = Args()

# **Load Data**

In [None]:
from google.colab import drive

import gdown

file_id = '1CCIfElCaURQbuYvHZiL445UQIRzmmuM7'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'train_vectors.pt'
gdown.download(url, output, quiet=False)

file_id = '1bwkg7XdmH6Mkp_tkAakCbxJMWNAXJU43'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'train_labels.pt'
gdown.download(url, output, quiet=False)

file_id = '1fprUkqC9Qb-y1eDRZt0gA4-4gS941TUo'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'test_vectors.pt'
gdown.download(url, output, quiet=False)

file_id = '1VwOqpW7DZPhqAGDrreVwhtzCB2lUc_LD'
url = f'https://drive.google.com/uc?id={file_id}'
output = 'test_labels.pt'
gdown.download(url, output, quiet=False)

#!wget --no-check-certificate 'https://drive.google.com/file/d/1CCIfElCaURQbuYvHZiL445UQIRzmmuM7/view?usp=share_link' -O train_vectors.pt
#!wget --no-check-certificate 'https://drive.google.com/file/d/1bwkg7XdmH6Mkp_tkAakCbxJMWNAXJU43/view?usp=share_link' -O train_labels.pt
#!wget --no-check-certificate 'https://drive.google.com/file/d/1fprUkqC9Qb-y1eDRZt0gA4-4gS941TUo/view?usp=sharing' -O test_vectors.pt
#!wget --no-check-certificate 'https://drive.google.com/file/d/1VwOqpW7DZPhqAGDrreVwhtzCB2lUc_LD/view?usp=sharing' -O test_labels.pt
#!ls
train_vectors = torch.load('train_vectors.pt')
train_labels = torch.load('train_labels.pt')
test_vectors = torch.load('test_vectors.pt')
test_labels = torch.load('test_labels.pt')





Downloading...
From: https://drive.google.com/uc?id=1CCIfElCaURQbuYvHZiL445UQIRzmmuM7
To: /content/train_vectors.pt
100%|██████████| 80.0M/80.0M [00:00<00:00, 129MB/s]
Downloading...
From: https://drive.google.com/uc?id=1bwkg7XdmH6Mkp_tkAakCbxJMWNAXJU43
To: /content/train_labels.pt
100%|██████████| 3.20M/3.20M [00:00<00:00, 60.9MB/s]
Downloading...
From: https://drive.google.com/uc?id=1fprUkqC9Qb-y1eDRZt0gA4-4gS941TUo
To: /content/test_vectors.pt
100%|██████████| 20.0M/20.0M [00:00<00:00, 160MB/s]
Downloading...
From: https://drive.google.com/uc?id=1VwOqpW7DZPhqAGDrreVwhtzCB2lUc_LD
To: /content/test_labels.pt
100%|██████████| 801k/801k [00:00<00:00, 23.8MB/s]


# **Start training and testing**

In [None]:
torch.manual_seed(args.seed)

device = torch.device("cuda" if args.use_cuda else "cpu")
model = Net().to(device)

for param_tensor in model.state_dict():
        print(param_tensor, "\t", model.state_dict()[param_tensor].size())

#Form training and testing dataset
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

train_dataset = torch.utils.data.TensorDataset(train_vectors, train_labels)
test_dataset = torch.utils.data.TensorDataset(test_vectors, test_labels)

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=640, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=640, shuffle=False)
scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)

#Model training
ACC = 0
for epoch in range(1, args.epochs + 1):
    train(args, model, device, train_loader, optimizer, epoch)
    ACC_ = test(model, device, test_loader)
    if ACC_>ACC or ACC_ == ACC:
        ACC = ACC_
        torch.save(model.state_dict(), "Baseline_CNN.pt")

    scheduler.step()

print(ACC)


conv1.weight 	 torch.Size([32, 1, 3])
conv1.bias 	 torch.Size([32])
Bn1.weight 	 torch.Size([32])
Bn1.bias 	 torch.Size([32])
Bn1.running_mean 	 torch.Size([32])
Bn1.running_var 	 torch.Size([32])
Bn1.num_batches_tracked 	 torch.Size([])
conv2.weight 	 torch.Size([32, 32, 3])
conv2.bias 	 torch.Size([32])
Bn2.weight 	 torch.Size([32])
Bn2.bias 	 torch.Size([32])
Bn2.running_mean 	 torch.Size([32])
Bn2.running_var 	 torch.Size([32])
Bn2.num_batches_tracked 	 torch.Size([])
fc1.weight 	 torch.Size([5, 384])
fc1.bias 	 torch.Size([5])

Test set: Average loss: 0.0000, Accuracy: 53967/100000 (54%)


Test set: Average loss: 0.0000, Accuracy: 54738/100000 (55%)


Test set: Average loss: 0.0000, Accuracy: 55216/100000 (55%)


Test set: Average loss: 0.0000, Accuracy: 55579/100000 (56%)


Test set: Average loss: 0.0000, Accuracy: 55638/100000 (56%)


Test set: Average loss: 0.0000, Accuracy: 55798/100000 (56%)


Test set: Average loss: 0.0000, Accuracy: 55789/100000 (56%)


Test set: Average lo