In [1]:
ls

[0m[01;32m__init__.py[0m*  [34;42mdata[0m/             [01;32mtokenizator.ipynb[0m*  [01;32mtransformer.ipynb[0m*
[34;42m__pycache__[0m/  [01;32mlightning.ipynb[0m*  [34;42mtokenizers[0m/
[34;42mconstants[0m/    [34;42mmodels[0m/           [01;32mtraining.ipynb[0m*


In [2]:
import math

import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F

import pandas as pd

import tokenizers

import seaborn as sns
sns.set(rc={'figure.figsize':(15, 10)})

from einops import rearrange

from constants import paths as p
from constants import tokens as t
from constants import hyperparameters as hp

import pytorch_lightning as pl
import pytorch_lightning.callbacks as cb
from models.lightning import IMDB_Reviews, TransformerLightning

https://pytorch.org/tutorials/intermediate/tensorboard_tutorial.html  
https://github.com/abrazinskas/FewSum/tree/master/fewsum/modelling/models  
https://pytorch.org/tutorials/intermediate/seq2seq_translation_tutorial.html  
https://colab.research.google.com/drive/1Uq5vIheRUuRbCplQe29aeSAi_fe-76v_#scrollTo=5Ed0vgI0PupK  

In [2]:
data = IMDB_Reviews(p.TRAIN_TENSOR_DATASET_PATH, p.VALIDATION_TENSOR_DATASET_PATH, hp.BATCH_SIZE)

In [30]:
model = TransformerLightning()

In [10]:
next(model.parameters()).shape

torch.Size([20000, 400])

In [15]:
n = next(model.parameters())

In [24]:
nn.init.normal_(n, mean=5, std=0.5)

Parameter containing:
tensor([[5.4169, 5.5481, 5.3064,  ..., 5.4332, 5.1129, 4.6798],
        [5.0069, 5.1849, 4.1993,  ..., 5.0839, 5.9526, 4.8688],
        [4.3287, 4.9560, 4.9175,  ..., 5.2828, 4.3712, 4.7301],
        ...,
        [5.6381, 5.9078, 4.1373,  ..., 4.6712, 4.7053, 5.0613],
        [5.0040, 4.1237, 4.7372,  ..., 4.5835, 5.9708, 5.7477],
        [4.5199, 6.0032, 5.2160,  ..., 4.3543, 5.4183, 5.2074]],
       requires_grad=True)

In [25]:
n

Parameter containing:
tensor([[5.4169, 5.5481, 5.3064,  ..., 5.4332, 5.1129, 4.6798],
        [5.0069, 5.1849, 4.1993,  ..., 5.0839, 5.9526, 4.8688],
        [4.3287, 4.9560, 4.9175,  ..., 5.2828, 4.3712, 4.7301],
        ...,
        [5.6381, 5.9078, 4.1373,  ..., 4.6712, 4.7053, 5.0613],
        [5.0040, 4.1237, 4.7372,  ..., 4.5835, 5.9708, 5.7477],
        [4.5199, 6.0032, 5.2160,  ..., 4.3543, 5.4183, 5.2074]],
       requires_grad=True)

In [32]:
next(model.parameters())

Parameter containing:
tensor([[-7.9550e-01,  1.4021e-01,  1.6886e+00,  ..., -1.2174e+00,
          6.3651e-01, -9.7681e-01],
        [ 1.1089e+00,  1.2250e-01,  1.0834e-01,  ...,  8.7073e-01,
          2.4968e-01, -7.4792e-03],
        [ 1.6273e+00,  1.1973e-01, -5.5119e-01,  ..., -1.3389e+00,
          1.9048e+00, -1.9424e+00],
        ...,
        [-1.7167e+00,  3.9627e-01,  2.4464e-01,  ..., -1.1313e+00,
         -2.1135e-01,  3.9232e-01],
        [-1.0144e+00,  2.8147e-01,  1.0556e+00,  ...,  1.0291e+00,
          1.8795e-01,  1.5316e+00],
        [ 1.0786e+00,  7.4860e-02, -7.7705e-01,  ..., -8.8402e-01,
          1.8434e-03,  2.9670e-02]], requires_grad=True)

In [45]:
relu_recommended_gain = nn.init.calculate_gain('relu')
for parameter in model.parameters():
    if parameter.dim() > 1:
        nn.init.xavier_uniform_(parameter, gain=relu_recommended_gain)
    else:
        nn.init.normal_(parameter, std=0.1)

In [46]:
next(model.parameters())

Parameter containing:
tensor([[-0.0221,  0.0080,  0.0132,  ...,  0.0049, -0.0175,  0.0228],
        [-0.0040,  0.0204,  0.0008,  ..., -0.0239,  0.0174,  0.0168],
        [ 0.0066,  0.0184,  0.0142,  ..., -0.0201, -0.0126, -0.0189],
        ...,
        [-0.0138,  0.0154,  0.0207,  ...,  0.0094,  0.0232,  0.0052],
        [ 0.0221,  0.0213,  0.0176,  ...,  0.0008,  0.0178,  0.0074],
        [ 0.0084,  0.0181, -0.0090,  ..., -0.0064,  0.0017, -0.0118]],
       requires_grad=True)

In [51]:
model.transformer.embedder

Embedding(20000, 400, padding_idx=3)

In [4]:
model

TransformerLightning(
  (transformer): Transformer(
    (embedder): Embedding(20000, 400, padding_idx=3)
    (embedder_drouput): Dropout(p=0.1, inplace=False)
    (embedding_layer): Sequential(
      (0): Embedding(20000, 400, padding_idx=3)
      (1): Dropout(p=0.1, inplace=False)
    )
    (combine_embeddings_and_properties_layer): Linear(in_features=405, out_features=400, bias=True)
    (positional_encoding_layer): PositionalEncoding(
      (dropout_layer): Dropout(p=0.1, inplace=False)
    )
    (transformer): Transformer(
      (encoder): TransformerEncoder(
        (layers): ModuleList(
          (0): TransformerEncoderLayer(
            (self_attn): MultiheadAttention(
              (out_proj): _LinearWithBias(in_features=400, out_features=400, bias=True)
            )
            (linear1): Linear(in_features=400, out_features=1024, bias=True)
            (dropout): Dropout(p=0.1, inplace=False)
            (linear2): Linear(in_features=1024, out_features=400, bias=True)
      

In [6]:
trainer = pl.Trainer(progress_bar_refresh_rate=50, max_epochs=5, gpus=1, num_sanity_val_steps=1, callbacks=[cb.EarlyStopping("validation_loss"), cb.GPUStatsMonitor(), cb.ModelCheckpoint(dirpath="./models", filename='{epoch}_{validation_loss:.3f}', mode="min", save_top_k=3, monitor="validation_loss")])

MisconfigurationException: Cannot use GPUStatsMonitor callback because NVIDIA driver is not installed.

In [None]:
trainer.fit(model, data)

In [None]:
%load_ext tensorboard
%tensorboard --logdir lightning_logs/

In [None]:
! ls -l models