## Assignment 07

#### Submitted By:
1. Dhruvan Ganesh
2. Sheikh Mastura Farzana

In [1]:
%load_ext tensorboard

### Environment Information
---

In [2]:
from pytorch_lightning import seed_everything
from torch import device, cuda

device = device("cuda:0" if cuda.is_available() else "cpu")
print("Device:", device)
if cuda.is_available():
    print("Device Name:", cuda.get_device_name(0))

seed = 42
seed_everything(seed)
print("Set Random Seed:", seed)

data_dir = "~/.datasets"
print("Data Dir:", data_dir)

Device: cuda:0
Device Name: GeForce GTX 1660 Ti
Set Random Seed: 42
Data Dir: ~/.datasets


# Assignment7 (21 Aug 2020)
---

- Rewrite the MNIST LSTM code using your own LSTM layer (Don't use nn.LSTM or nn.LSTMCell!)
- Extra point to write MNIST GRU code using your own GRU layer (Don't use nn.GRU or nn.GRUCell!)


### MINST Loader
---

In [3]:
from pytorch_lightning import LightningModule
from torchvision import transforms, datasets
from torch.utils.data import DataLoader


class MINSTData(LightningModule):
    def __init__(self, batch_size=1024):
        super().__init__()
        self.batch_size = batch_size
        self.preprocess = transforms.ToTensor()

    def train_dataloader(self):
        dataset = datasets.MNIST(
            data_dir, train=True, transform=self.preprocess, download=True
        )
        loader = DataLoader(
            dataset,
            batch_size=self.batch_size,
            num_workers=8,
            pin_memory=True,
            shuffle=True,
            drop_last=True,
        )
        return loader

    def val_dataloader(self):
        dataset = datasets.MNIST(
            data_dir, train=False, transform=self.preprocess, download=True
        )
        loader = DataLoader(
            dataset,
            batch_size=self.batch_size,
            num_workers=8,
            pin_memory=True,
            drop_last=True,
        )
        return loader

In [4]:
import torch
import torch.nn as nn


class _CellParameters(nn.Module):
    def __init__(self, n_in: int, n_hidden: int):
        super().__init__()
        self.w_i = nn.Parameter(torch.Tensor(n_in, n_hidden))
        self.w_h = nn.Parameter(torch.Tensor(n_hidden, n_hidden))
        self.b_i = nn.Parameter(torch.Tensor(n_hidden))
        self.b_h = nn.Parameter(torch.Tensor(n_hidden))

## <span style="font-variant:small-caps">Task 1: LSTM Implementation</span>
---

In [5]:
class OwnLSTMCell(nn.Module):
    def __init__(self, n_in, n_hidden):
        super().__init__()
        self.input = _CellParameters(n_in, n_hidden)
        self.forget = _CellParameters(n_in, n_hidden)
        self.cell = _CellParameters(n_in, n_hidden)
        self.output = _CellParameters(n_in, n_hidden)
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, _CellParameters):
                nn.init.orthogonal_(m.w_i)
                nn.init.orthogonal_(m.w_h)
                nn.init.zeros_(m.b_i)
                nn.init.zeros_(m.b_h)

    def forward(self, x, h, c):
        _input = torch.sigmoid(
            x @ self.input.w_i + self.input.b_i + h @ self.input.w_h + self.input.b_h
        )
        _forget = torch.sigmoid(
            x @ self.forget.w_i
            + self.forget.b_i
            + h @ self.forget.w_h
            + self.forget.b_h
        )
        _cell_gate = torch.tanh(
            x @ self.cell.w_i + self.cell.b_i + h @ self.cell.w_h + self.cell.b_h
        )
        _output = torch.sigmoid(
            x @ self.output.w_i
            + self.output.b_i
            + h @ self.output.w_h
            + self.output.b_h
        )
        _cell_state = _forget * c + _input * _cell_gate
        _hidden_state = _output * torch.tanh(_cell_state)
        return _hidden_state, _cell_state

## <span style="font-variant:small-caps">Task 2 (Bonus): GRU Implementation</span>
---

In [6]:
class OwnGRUCell(nn.Module):
    def __init__(self, n_in, n_hidden):
        super().__init__()
        self.reset = _CellParameters(n_in, n_hidden)
        self.update = _CellParameters(n_in, n_hidden)
        self.new = _CellParameters(n_in, n_hidden)
        self._initialize_weights()

    def _initialize_weights(self):
        for m in self.modules():
            if isinstance(m, _CellParameters):
                nn.init.orthogonal_(m.w_i)
                nn.init.orthogonal_(m.w_h)
                nn.init.zeros_(m.b_i)
                nn.init.zeros_(m.b_h)

    def forward(self, x, h):
        _reset = torch.sigmoid(
            x @ self.reset.w_i + self.reset.b_i + h @ self.reset.w_h + self.reset.b_h
        )
        _update = torch.sigmoid(
            x @ self.update.w_i
            + self.update.b_i
            + h @ self.update.w_h
            + self.update.b_h
        )
        _new = torch.tanh(
            x @ self.new.w_i + self.new.b_i + _reset * (h @ self.new.w_h + self.new.b_h)
        )
        _hidden_state = (1 - _update) * _new + _update * h
        return _hidden_state

### Generic RNN Model
---

In [7]:
class OwnRnn(MINSTData):
    def __init__(self, n_in, n_hidden, n_out, n_layer, rnn_impl=OwnLSTMCell):
        super().__init__()
        self.n_in = n_in
        self.n_hidden = n_hidden
        self.n_out = n_out
        self.n_layer = n_layer
        self.rnn_impl = rnn_impl
        self.rnn_layers = nn.ModuleList(
            [rnn_impl(n_in, n_hidden) for _ in range(n_layer)]
        )
        self.classifier = nn.Linear(n_hidden, n_out)
        self.loss = nn.CrossEntropyLoss()

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=0.001, weight_decay=1e-5)
        return optimizer

    def forward(self, x):
        x = torch.squeeze(x)
        batch_size, n, _ = x.shape
        h = [
            torch.zeros(batch_size, self.n_hidden, device=device)
            for _ in range(self.n_layer)
        ]

        if self.rnn_impl == OwnLSTMCell:
            c = [
                torch.zeros(batch_size, self.n_hidden, device=device)
                for _ in range(self.n_layer)
            ]

        for x_i in range(n):
            _x = x[:, x_i, :]

            for l_i, rnn_layer in enumerate(self.rnn_layers):
                if self.rnn_impl == OwnLSTMCell:
                    h[l_i], c[l_i] = rnn_layer.forward(_x, h[l_i], c[l_i])

                elif self.rnn_impl == OwnGRUCell:
                    h[l_i] = rnn_layer.forward(_x, h[l_i])

        out = h[-1]
        out = self.classifier(out)
        return out

    def training_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)
        loss = self.loss(y_hat, y)
        labels_hat = torch.argmax(y_hat, dim=1)
        train_acc = torch.sum(torch.eq(y, labels_hat)).item() / (len(y) * 1.0)
        tensorboard_logs = {"train_loss": loss.item(), "train_acc": train_acc}
        return {"loss": loss, "log": tensorboard_logs}

    def validation_step(self, batch, batch_idx):
        x, y = batch
        y_hat = self(x)

        labels_hat = torch.argmax(y_hat, dim=1)
        val_acc = torch.sum(torch.eq(y, labels_hat)).item() / (len(y) * 1.0)

        return {"val_loss": self.loss(y_hat, y), "val_acc": torch.tensor(val_acc)}

    def validation_epoch_end(self, outputs):
        avg_loss = torch.stack([x["val_loss"] for x in outputs]).mean()
        avg_acc = torch.stack([x["val_acc"] for x in outputs]).mean()
        tensorboard_logs = {"val_loss": avg_loss, "val_acc": avg_acc}
        return {"val_loss": avg_loss, "val_acc": avg_acc, "log": tensorboard_logs}

    @torch.no_grad()
    def get_all_preds(self):
        all_preds = torch.tensor([]).to(device)
        labels = torch.tensor([], dtype=torch.int64).to(device)
        for x, y in self.val_dataloader():
            x, y = x.to(device), y.to(device)
            y_hat = self.forward(x)
            all_preds = torch.cat((all_preds, y_hat), dim=0)
            labels = torch.cat((labels, y), dim=0)
        pred_labels = all_preds.argmax(dim=1)
        return pred_labels, labels

### Experiment Runner
---

In [8]:
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import TensorBoardLogger


def run_experiment(model, tensorboard_graph_name=None, max_epochs=20):
    if tensorboard_graph_name:
        logger = TensorBoardLogger("lightning_logs", name=tensorboard_graph_name)
    else:
        logger = False

    trainer = Trainer(
        gpus=1,
        num_nodes=1,
        deterministic=True,
        max_epochs=max_epochs,
        logger=logger,  # set False to disable tensorboard logs
        progress_bar_refresh_rate=0,  # disable progress bar
    )
    trainer.fit(model)

    return model

### Train LSTM-MINST
---

In [9]:
%tensorboard --logdir lightning_logs/minst_lstm

Launching TensorBoard...

In [10]:
lstm_model = run_experiment(
    OwnRnn(n_in=28, n_hidden=100, n_out=10, n_layer=3, rnn_impl=OwnLSTMCell),
    tensorboard_graph_name="minst_lstm",
)

GPU available: True, used: True
No environment variable for node rank defined. Set as 0.
CUDA_VISIBLE_DEVICES: [0]

   | Name                | Type             | Params
-----------------------------------------------------
0  | rnn_layers          | ModuleList       | 156 K 
1  | rnn_layers.0        | OwnLSTMCell      | 52 K  
2  | rnn_layers.0.input  | _CellParameters  | 13 K  
3  | rnn_layers.0.forget | _CellParameters  | 13 K  
4  | rnn_layers.0.cell   | _CellParameters  | 13 K  
5  | rnn_layers.0.output | _CellParameters  | 13 K  
6  | rnn_layers.1        | OwnLSTMCell      | 52 K  
7  | rnn_layers.1.input  | _CellParameters  | 13 K  
8  | rnn_layers.1.forget | _CellParameters  | 13 K  
9  | rnn_layers.1.cell   | _CellParameters  | 13 K  
10 | rnn_layers.1.output | _CellParameters  | 13 K  
11 | rnn_layers.2        | OwnLSTMCell      | 52 K  
12 | rnn_layers.2.input  | _CellParameters  | 13 K  
13 | rnn_layers.2.forget | _CellParameters  | 13 K  
14 | rnn_layers.2.cell   | _CellPar

In [11]:
from sklearn.metrics import classification_report

y_pred, y_true = lstm_model.get_all_preds()
y_pred, y_true = y_pred.cpu().numpy(), y_true.cpu().numpy()


print("\n==================\nValidation Report:\n==================")
print(classification_report(y_true, y_pred))


Validation Report:
              precision    recall  f1-score   support

           0       0.98      0.99      0.98       897
           1       0.98      0.99      0.99      1041
           2       0.97      0.96      0.96       958
           3       0.96      0.97      0.96       929
           4       0.97      0.96      0.97       910
           5       0.98      0.96      0.97       823
           6       0.98      0.98      0.98       881
           7       0.98      0.97      0.97       939
           8       0.97      0.96      0.96       900
           9       0.95      0.97      0.96       938

    accuracy                           0.97      9216
   macro avg       0.97      0.97      0.97      9216
weighted avg       0.97      0.97      0.97      9216



### Train GRU-MINST
---

In [12]:
%tensorboard --logdir lightning_logs/minst_gru

Launching TensorBoard...

In [13]:
gru_model = run_experiment(
    OwnRnn(n_in=28, n_hidden=100, n_out=10, n_layer=3, rnn_impl=OwnGRUCell),
    tensorboard_graph_name="minst_gru",
)

GPU available: True, used: True
No environment variable for node rank defined. Set as 0.
CUDA_VISIBLE_DEVICES: [0]

   | Name                | Type             | Params
-----------------------------------------------------
0  | rnn_layers          | ModuleList       | 117 K 
1  | rnn_layers.0        | OwnGRUCell       | 39 K  
2  | rnn_layers.0.reset  | _CellParameters  | 13 K  
3  | rnn_layers.0.update | _CellParameters  | 13 K  
4  | rnn_layers.0.new    | _CellParameters  | 13 K  
5  | rnn_layers.1        | OwnGRUCell       | 39 K  
6  | rnn_layers.1.reset  | _CellParameters  | 13 K  
7  | rnn_layers.1.update | _CellParameters  | 13 K  
8  | rnn_layers.1.new    | _CellParameters  | 13 K  
9  | rnn_layers.2        | OwnGRUCell       | 39 K  
10 | rnn_layers.2.reset  | _CellParameters  | 13 K  
11 | rnn_layers.2.update | _CellParameters  | 13 K  
12 | rnn_layers.2.new    | _CellParameters  | 13 K  
13 | classifier          | Linear           | 1 K   
14 | loss                | CrossEnt

In [14]:
from sklearn.metrics import classification_report

y_pred, y_true = gru_model.get_all_preds()
y_pred, y_true = y_pred.cpu().numpy(), y_true.cpu().numpy()


print("\n==================\nValidation Report:\n==================")
print(classification_report(y_true, y_pred))


Validation Report:
              precision    recall  f1-score   support

           0       0.98      0.98      0.98       897
           1       0.99      0.98      0.99      1041
           2       0.96      0.97      0.97       958
           3       0.97      0.97      0.97       929
           4       0.97      0.95      0.96       910
           5       0.96      0.96      0.96       823
           6       0.98      0.97      0.97       881
           7       0.98      0.96      0.97       939
           8       0.94      0.97      0.95       900
           9       0.95      0.96      0.95       938

    accuracy                           0.97      9216
   macro avg       0.97      0.97      0.97      9216
weighted avg       0.97      0.97      0.97      9216

