# Neural & Behavioral Modeling - Week 12 (Exercises)
by 李彥廷 (b08207008@ntu.edu.tw)

In [1]:
%config IPCompleter.greedy=True 
%matplotlib inline
from matplotlib.pyplot import *
from IPython.display import *
import numpy as np
import torch as t
from torch import nn
from torch import optim

In [2]:
# Check GPU status:
print('PyTorch version:',t.__version__)
use_cuda=t.cuda.is_available()
if(use_cuda):
    for i in range(t.cuda.device_count()):
        print('Device ',i,':',t.cuda.get_device_name(i))
    print('Current: Device ',t.cuda.current_device())
    t.backends.cudnn.benchmark = True 
    device = t.device("cuda")
else:
    device = t.device("cpu")
    print('No GPU')

PyTorch version: 1.13.0
No GPU


## 1. RNN on images (4 points)
Note that the RNN-based MNIST classifier in 2.1.1 of model_12_examples.ipynb did not apply a softmax as the response function of its last layer.

Does adding a softmax make any difference in model prediciton performance? Why or why not?

Refs:

[1] https://pytorch.org/docs/stable/nn.html 

[2] https://pytorch.org/docs/stable/nn.functional.html 

[3] https://pytorch.org/docs/stable/_modules/torch/nn/modules/loss.html

In [3]:
# Please run your computational experiments and write your observations accordingly.
from torchvision import datasets, transforms

transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean=[0.5],std=[0.5])])

train_set = datasets.MNIST(root = "./data",
                               transform = transform,
                               train = True,
                               download = True)

train_data = t.utils.data.DataLoader(train_set, batch_size=64, shuffle=True)


test_set = datasets.MNIST(root = "./data",
                              transform = transform,
                              train = False)

test_data = t.utils.data.DataLoader(test_set, batch_size=64, shuffle=True)

In [46]:
# Supporting functions:

from datetime import datetime

def get_acc(output, label):
    total = output.shape[0]
    _, pred_label = output.max(1)
    num_correct = (pred_label == label).sum().item()
    return num_correct / total


def train(device, net, train_data, valid_data, num_epochs, optimizer, criterion):
    if t.cuda.is_available():
        net = net.cuda()
    prev_time = datetime.now()
    for epoch in range(num_epochs):
        train_loss = 0
        train_acc = 0
        net = net.train()
        #for im, label in train_data:
        for im, label in train_data:
            im=im.view(-1,im.shape[2],im.shape[3])
            im = im.to(device)  # (bs, h, w)
            label = label.to(device)
            # forward
            output = net(im)
            loss = criterion(output, label)
            # backward
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            train_loss += loss.data.item()
            train_acc += get_acc(output, label)

        cur_time = datetime.now()
        h, remainder = divmod((cur_time - prev_time).seconds, 3600)
        m, s = divmod(remainder, 60)
        time_str = "Time %02d:%02d:%02d" % (h, m, s)
        if valid_data is not None:
            valid_loss = 0
            valid_acc = 0
            net = net.eval()
            for im, label in valid_data:
                with t.no_grad():
                    im=im.view(-1,im.shape[2],im.shape[3])
                    im = im.to(device)
                    label = label.to(device)
                output = net(im)
                loss = criterion(output, label)
                valid_loss += loss.data.item()
                valid_acc += get_acc(output, label)
            epoch_str = (
                "Epoch %d. Train Loss: %f, Train Acc: %f, Valid Loss: %f, Valid Acc: %f, "
                % (epoch, train_loss / len(train_data),
                   train_acc / len(train_data), valid_loss / len(valid_data),
                   valid_acc / len(valid_data)))
        else:
            epoch_str = ("Epoch %d. Train Loss: %f, Train Acc: %f, " %
                         (epoch, train_loss / len(train_data),
                          train_acc / len(train_data)))
        prev_time = cur_time
        print(epoch_str + time_str)

In [47]:
# Thoe model in 2.1.1 of 13_examples.ipynb:

class RNN_1(nn.Module):
    def __init__(self):
        super(RNN_1, self).__init__()
        self.rnn = nn.RNN(input_size = 28,
                                hidden_size = 128,
                                num_layers = 1,
                                batch_first = True)
        
        self.classifier = nn.Linear(128,10)
        self.Softmax = nn.Softmax(dim=1)
        
    def forward(self, input):
        output,_ = self.rnn(input, None)
        output = self.classifier(output[:,-1,:])
        output = self.Softmax(output)
        return output

In [48]:
# Thoe model in 2.1.1 of 13_examples.ipynb:

class RNN_2(nn.Module):
    def __init__(self):
        super(RNN_2, self).__init__()
        self.rnn = nn.RNN(input_size = 28,
                                hidden_size = 128,
                                num_layers = 1,
                                batch_first = True)
        
        self.classifier = nn.Linear(128,10)
        
    def forward(self, input):
        output,_ = self.rnn(input, None)
        output = self.classifier(output[:,-1,:])

        return output

In [49]:
srn = RNN_1()
optimizer = optim.Adam(srn.parameters())
criterion = nn.CrossEntropyLoss()
print('Result of RNN with Softmax')
train(device, srn, train_data, test_data, 10, optimizer, criterion)

Result of RNN with Softmax
Epoch 0. Train Loss: 1.804475, Train Acc: 0.672058, Valid Loss: 1.670232, Valid Acc: 0.796477, Time 00:00:04
Epoch 1. Train Loss: 1.675504, Train Acc: 0.788380, Valid Loss: 1.630095, Valid Acc: 0.833101, Time 00:00:04
Epoch 2. Train Loss: 1.650786, Train Acc: 0.811650, Valid Loss: 1.628164, Valid Acc: 0.833798, Time 00:00:04
Epoch 3. Train Loss: 1.631496, Train Acc: 0.831573, Valid Loss: 1.586569, Valid Acc: 0.877289, Time 00:00:04
Epoch 4. Train Loss: 1.581084, Train Acc: 0.882363, Valid Loss: 1.593996, Valid Acc: 0.870123, Time 00:00:04
Epoch 5. Train Loss: 1.567873, Train Acc: 0.895289, Valid Loss: 1.559551, Valid Acc: 0.902468, Time 00:00:04
Epoch 6. Train Loss: 1.562428, Train Acc: 0.899820, Valid Loss: 1.547130, Valid Acc: 0.915207, Time 00:00:04
Epoch 7. Train Loss: 1.554359, Train Acc: 0.907316, Valid Loss: 1.576294, Valid Acc: 0.886545, Time 00:00:04
Epoch 8. Train Loss: 1.547915, Train Acc: 0.914246, Valid Loss: 1.561148, Valid Acc: 0.901672, Time 0

In [50]:
srn = RNN_2()
optimizer = optim.Adam(srn.parameters())
criterion = nn.CrossEntropyLoss()
print('Result of RNN without Softmax')
train(device, srn, train_data, test_data, 10, optimizer, criterion)

Result of RNN without Softmax
Epoch 0. Train Loss: 0.805994, Train Acc: 0.729028, Valid Loss: 0.436356, Valid Acc: 0.871218, Time 00:00:04
Epoch 1. Train Loss: 0.353045, Train Acc: 0.896938, Valid Loss: 0.290964, Valid Acc: 0.916700, Time 00:00:04
Epoch 2. Train Loss: 0.242571, Train Acc: 0.931187, Valid Loss: 0.200770, Valid Acc: 0.943969, Time 00:00:04
Epoch 3. Train Loss: 0.202838, Train Acc: 0.941831, Valid Loss: 0.155826, Valid Acc: 0.957305, Time 00:00:04
Epoch 4. Train Loss: 0.178245, Train Acc: 0.949910, Valid Loss: 0.179783, Valid Acc: 0.952030, Time 00:00:04
Epoch 5. Train Loss: 0.165665, Train Acc: 0.952775, Valid Loss: 0.148497, Valid Acc: 0.958997, Time 00:00:04
Epoch 6. Train Loss: 0.153037, Train Acc: 0.956773, Valid Loss: 0.149998, Valid Acc: 0.959992, Time 00:00:04
Epoch 7. Train Loss: 0.140425, Train Acc: 0.960854, Valid Loss: 0.158948, Valid Acc: 0.956608, Time 00:00:04
Epoch 8. Train Loss: 0.130926, Train Acc: 0.963386, Valid Loss: 0.127636, Valid Acc: 0.964670, Tim

從以上的結果可以看到正確率有稍微變小，且Loss在沒加softmax的RNN上面比較小，這可能是因為若將output先經過softmax轉換之後所得的數值都會變小，使得之後在算crossentropyloss的時候都變大了，使得他的表現在訓練過程可能較為不穩定，導致表現較差。以下展示了在一個假設的資料下，有經過和沒經過softmax轉換後的loss結果。

In [77]:
im = t.FloatTensor([2,3,-1,1])
label = t.tensor(1)
sm = nn.Softmax(dim=0) 
output1 = sm(im)
output2 = im
cel = nn.CrossEntropyLoss()
print('output with softmax', output1)
print('loss with softmax' ,cel(output1, label))
print()
print('output without softmax',im)
print('loss without softmax' ,cel(output2, label))

output with softmax tensor([0.2418, 0.6572, 0.0120, 0.0889])
loss with softmax tensor(1.0120)

output without softmax tensor([ 2.,  3., -1.,  1.])
loss without softmax tensor(0.4197)


## 2. RNN v.s. CNN (4 points)
Compare the results of 2.2.1 (RNN) & 2.3.1 (CNN) in model_12_examples.ipynb.

The 1D-CNN seems to predict the time series better than the RNN. Why?

You may verify your hypotheses by computational experiments.

In [9]:
# 2.2.1 (RNN):

In [10]:
# 2.3.1 (CNN):

In [11]:
# Please run your computational experiments and write your observations accordingly.