# CIS6930 Week 2: Mini-batch sampling follow-up (for in-class demo)

In this notebook, you will learn how PyTorch components/functions handle "batched" samples.

---

Preparation: Go to `Runtime > Change runtime type` and choose `GPU` for the hardware accelerator.



## A magic command to check your assigned GPU

In [None]:
gpu_info = !nvidia-smi -L
gpu_info = "\n".join(gpu_info)
if gpu_info.find("failed") >= 0:
  print("Not connected to a GPU")
else:
  print(gpu_info)

GPU 0: Tesla P100-PCIE-16GB (UUID: GPU-c9328b96-4cf0-0d22-d40b-9a021ea2fd42)


## Libraries

In [None]:
import copy
import random
from time import time
from typing import Any, Dict

import numpy as np
import pandas as pd
import seaborn as sns
from sklearn.datasets import load_digits
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch import optim
from torch.utils.data import Dataset, TensorDataset, DataLoader

In [None]:
#pred = torch.FloatTensor([0.4, 0.6])
#true = torch.LongTensor([1])

#loss_func = nn.CrossEntropy()
#loss_func(true, pred)

## Code from the examples of the last class

### Logistic Regression

In [None]:
class LogisticRegression(nn.Module):
    def __init__(self,
                 num_input,
                 num_output):
        super(LogisticRegression, self).__init__()
        self.linear = nn.Linear(num_input, num_output)

    def forward(self, X):
        out = self.linear(X)
        return out

### Training script

In [None]:
## Configurations ======
n_epochs = 10
batch_size = 16

lr = 0.01
momentum = 0.

num_input = 64
num_output = 10

# Random Seeds
torch.manual_seed(0)
random.seed(0)
np.random.seed(0)
## ======================


# GPU configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load the handwritten digit dataset
# https://scikit-learn.org/stable/modules/generated/sklearn.datasets.load_digits.html
data = load_digits()
X, y = data.data, data.target

# Splint into 60% train, 20% valid, 20% test
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=1)

X_train, X_valid, y_train, y_valid = train_test_split(
    X_train, y_train, test_size=0.25, random_state=1)  # 0.25 x 0.8 = 0.2

# NumPy array -> Torch tensor -> Dataset -> DataLoader
# for the train, validation, test datasets
dataset_train = TensorDataset(torch.Tensor(X_train),
                              torch.LongTensor(y_train))
dl_train = DataLoader(dataset_train,
                      batch_size=batch_size,
                      shuffle=True)

dataset_valid = TensorDataset(torch.Tensor(X_valid),
                              torch.LongTensor(y_valid))
dl_valid = DataLoader(dataset_valid)

dataset_test = TensorDataset(torch.Tensor(X_test),
                              torch.LongTensor(y_test))
dl_test = DataLoader(dataset_test)

# Model, Optimzier, Loss function
model = LogisticRegression(num_input=num_input,
                           num_output=num_output).to(device)
optimizer = optim.SGD(model.parameters(),
                      lr=lr, momentum=momentum)

loss_fn = nn.CrossEntropyLoss()

# For each epoch
eval_list = []
for n in range(n_epochs):
    print("Epoch {}".format(n))
    # Training
    train_loss = 0.
    train_pred_list = []
    train_true_list = []
    model.train()  # Switch to the training mode

    # For each batch
    for batch in dl_train:
        optimizer.zero_grad()              # Initialize gradient information
        X, y = batch
        out = model(X.to(device))          # Call `forward()` function of the model
        loss = loss_fn(out, y.to(device))  # Calculate loss 
        loss.backward()                    # Backpropagate the loss value
        optimizer.step()                   # Update the parameters
        
        # import sys; sys.exit()
        import pdb; pdb.set_trace()

        train_loss += loss.data.item() * batch_size
        train_pred_list += out.argmax(1).detach().cpu().tolist()
        train_true_list += y.detach().cpu().tolist()

    train_loss /= len(dl_train)
    train_acc = accuracy_score(train_true_list, train_pred_list)
    print("    Training loss: {:.4f}\t  Training acc: {:.4f}".format(train_loss, train_acc))

    # Validation
    valid_loss = 0.
    valid_pred_list = []
    valid_true_list = []

    model.eval()  # Switch to the evaluation mode
    for batch in dl_valid:
        X, y = batch
        out = model(X.to(device))
        loss = loss_fn(out, y.to(device))
        valid_loss += loss.data.item() * batch_size
        valid_pred_list.append(out.argmax(1).detach().cpu())
        valid_true_list.append(y.detach().cpu())

    valid_loss /= len(dl_valid)
    valid_acc = accuracy_score(valid_true_list, valid_pred_list)
    print("  Validation loss: {:.4f}\tValidation acc: {:.4f}".format(valid_loss, valid_acc))
    # Store train/validation loss, accuracy values
    eval_list.append([n, train_loss, train_acc, valid_loss, valid_acc])

eval_df = pd.DataFrame(eval_list, columns=["epoch", "train_loss", "train_acc",
                                           "valid_loss", "valid_acc"])

# Test
model.eval()
pred_list = []
true_list = []
for batch in dl_test:
    X, y = batch
    out = model(X.to(device))
    pred = out.argmax().item()
    pred_list.append(pred)
    true_list.append(y.item())
y_pred = np.array(pred_list)
y_true = np.array(true_list)

test_accuracy = accuracy_score(y_true, y_pred)
print("\nTest accuracy: {:.4f}".format(test_accuracy))

eval_df[["train_loss", "valid_loss"]].plot()
eval_df[["train_acc", "valid_acc"]].plot()


sys.settrace() should not be used when the debugger is being used.
This may cause the debugger to stop working correctly.
If this is needed, please check: 
http://pydev.blogspot.com/2007/06/why-cant-pydev-debugger-work-with.html
to see how to restore the debug tracing back correctly.
Call Location:
  File "/usr/lib/python3.7/bdb.py", line 332, in set_trace
    sys.settrace(self.trace_dispatch)



Epoch 0
> <ipython-input-20-f1cad334684c>(79)<module>()
-> train_loss += loss.data.item() * batch_size
*** AttributeError: 'list' object has no attribute 'shape'
torch.Size([16, 64])
*** NameError: name 'pred' is not defined
torch.Size([16, 10])


  return F.mse_loss(input, target, reduction=self.reduction)


*** RuntimeError: The size of tensor a (10) must match the size of tensor b (16) at non-singleton dimension 1


## Checking variables

In [None]:
## Input data
# X, y = batch
X


In [None]:
X.shape

In [None]:
model(X.to(device))

In [None]:
X[0:1]

In [None]:
X[0:1]

tensor([[ 0.,  0.,  0., 14., 14.,  1.,  0.,  0.,  0.,  0.,  6., 16., 12.,  0.,
          0.,  0.,  0.,  0., 12., 16.,  2.,  0.,  0.,  0.,  0.,  0., 16., 16.,
         16.,  9.,  0.,  0.,  0.,  1., 16., 15.,  8., 14.,  9.,  0.,  0.,  0.,
         14., 12.,  0., 12., 13.,  0.,  0.,  0.,  6., 14.,  7., 16., 10.,  0.,
          0.,  0.,  1., 13., 16., 13.,  1.,  0.]])

In [None]:
model(X[0:1].to(device))

tensor([[ -4.5490,   4.0253,  -1.5301,  -5.7145,   3.7009, -12.5724,   6.5383,
           3.3752,   2.5099,   1.3851]], device='cuda:0',
       grad_fn=<AddmmBackward>)

In [None]:
## True labels
y

tensor([6, 8, 4, 0, 1, 6, 1, 1, 3, 1, 5, 4, 2, 4, 9, 7])

In [None]:
y.shape

In [None]:
out.shape

In [None]:
## Loss func
loss_fn(out, y.to(device))

tensor(8.0736, device='cuda:0', grad_fn=<NllLossBackward>)

In [None]:
loss_fn(out[0:1], y[0:1].to(device))

tensor(7.3873, device='cuda:0', grad_fn=<NllLossBackward>)

In [None]:
for i in range(len(y)):
  print(loss_fn(out[i:i+1], y[i:i+1].to(device)))

tensor(7.3873, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(8.7995, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(12.9916, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(10.6342, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(7.0666, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(7.0021, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(11.7180, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(6.3022, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(14.9491, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(9.1619, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(0.5710, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(7.8638, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(1.6965, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(10.4794, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(5.5548, device='cuda:0', grad_fn=<NllLossBackward>)
tensor(6.9994, device='cuda:0', grad_fn=<NllLossBackward>)
