In [1]:
import torch
from ISLP.torch.imdb import load_tensor
from ISLP.torch import (SimpleDataModule,
                        SimpleModule,
                        ErrorTracker,
                        rec_num_workers)
from torch import nn
from torch.optim import RMSprop
from torchinfo import summary
from pytorch_lightning import Trainer
from pytorch_lightning.loggers import CSVLogger

In [2]:
from pytorch_lightning import seed_everything
seed_everything(0, workers=True)
torch.use_deterministic_algorithms(True, warn_only=True)

Seed set to 0


In [3]:
max_num_workers = rec_num_workers()

In [4]:
(imdb_train, imdb_test) = load_tensor(root='../../data/IMDB')
imdb_dm = SimpleDataModule(imdb_train, imdb_test, validation=2000,
                           num_workers=max_num_workers, batch_size=512)

In [5]:
class IMDBModel(nn.Module):

    def __init__(self, input_size, n_units=32, dropout_p=0):
        super(IMDBModel, self).__init__()
        self.dense1 = nn.Linear(input_size, n_units)
        self.activation = nn.ReLU()
        self.dropout = nn.Dropout(dropout_p)
        self.dense2 = nn.Linear(n_units, n_units)
        self.output = nn.Linear(n_units, 1)

    def forward(self, x):
        val = x
        for _map in [self.dense1, self.activation, self.dropout, self.dense2,
                     self.activation, self.dropout, self.output]:
            val = _map(val)
        return torch.flatten(val)

In [6]:
n_hidden_units_list = [32, 64]
dropout_prob_list = [0, 0.3]
imdb_modules = []
imdb_trainers = []

for n_hidden_units in n_hidden_units_list:
    for dropout_prob in dropout_prob_list:

        imdb_model = IMDBModel(imdb_test.tensors[0].size()[1], n_hidden_units,
                            dropout_prob)
        summary(imdb_model, input_size=imdb_test.tensors[0].size(), col_names=
                ['input_size', 'output_size', 'num_params'])

        imdb_optimizer = RMSprop(imdb_model.parameters(), lr=0.001)
        imdb_module = SimpleModule.binary_classification(imdb_model,
                                                         optimizer=
                                                         imdb_optimizer)
        imdb_modules.append(imdb_module)

        imdb_logger = CSVLogger('log', 
                                name=f'IMDB_{n_hidden_units}_{dropout_prob}')
        imdb_trainer = Trainer(deterministic=True, max_epochs=30,
                               logger=imdb_logger, callbacks=[ErrorTracker()])
        imdb_trainer.fit(imdb_module, datamodule=imdb_dm)
        imdb_trainers.append(imdb_trainer)

GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type              | Params
--------------------------------------------
0 | model | IMDBModel         | 321 K 
1 | loss  | BCEWithLogitsLoss | 0     
--------------------------------------------
321 K     Trainable params
0         Non-trainable params
321 K     Total params
1.285     Total estimated model params size (MB)


                                                                           

/home/niccolo/.local/lib/python3.10/site-packages/pytorch_lightning/loops/fit_loop.py:298: The number of training batches (45) is smaller than the logging interval Trainer(log_every_n_steps=50). Set a lower value for log_every_n_steps if you want to see logs for the training epoch.


Epoch 29: 100%|██████████| 45/45 [00:09<00:00,  4.87it/s, v_num=4]

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 45/45 [00:09<00:00,  4.83it/s, v_num=4]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type              | Params
--------------------------------------------
0 | model | IMDBModel         | 321 K 
1 | loss  | BCEWithLogitsLoss | 0     
--------------------------------------------
321 K     Trainable params
0         Non-trainable params
321 K     Total params
1.285     Total estimated model params size (MB)


Epoch 29: 100%|██████████| 45/45 [00:06<00:00,  7.20it/s, v_num=2]         

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 45/45 [00:06<00:00,  7.14it/s, v_num=2]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type              | Params
--------------------------------------------
0 | model | IMDBModel         | 644 K 
1 | loss  | BCEWithLogitsLoss | 0     
--------------------------------------------
644 K     Trainable params
0         Non-trainable params
644 K     Total params
2.578     Total estimated model params size (MB)


Epoch 29: 100%|██████████| 45/45 [00:07<00:00,  6.20it/s, v_num=2]         

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 45/45 [00:07<00:00,  6.15it/s, v_num=2]


GPU available: False, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs

  | Name  | Type              | Params
--------------------------------------------
0 | model | IMDBModel         | 644 K 
1 | loss  | BCEWithLogitsLoss | 0     
--------------------------------------------
644 K     Trainable params
0         Non-trainable params
644 K     Total params
2.578     Total estimated model params size (MB)


Epoch 29: 100%|██████████| 45/45 [00:07<00:00,  6.13it/s, v_num=2]         

`Trainer.fit` stopped: `max_epochs=30` reached.


Epoch 29: 100%|██████████| 45/45 [00:07<00:00,  6.07it/s, v_num=2]


In [7]:
model_n = 0
test_results_list = []

for n_hidden_units in n_hidden_units_list:
    for dropout_prob in dropout_prob_list:

        test_results_list.append(imdb_trainers[model_n].test(
            imdb_modules[model_n], datamodule=imdb_dm))
        
        model_n += 1

Testing DataLoader 0: 100%|██████████| 49/49 [00:04<00:00, 12.23it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.8539999723434448
        test_loss           1.2648829221725464
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
Testing DataLoader 0: 100%|██████████| 49/49 [00:03<00:00, 14.45it/s]
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
      test_accuracy         0.8621600270271301
        test_loss         

In [8]:
model_n = 0

for n_hidden_units in n_hidden_units_list:
    for dropout_prob in dropout_prob_list:

      print(f"Model #{model_n+1}: # hidden units per layer: {n_hidden_units}, "
            f"dropout probability: {dropout_prob}")
      print("\ttest loss: "
            f"{round(test_results_list[model_n][0]['test_loss'], 3)}, "
            "test accuracy: "
            f"{round(test_results_list[model_n][0]['test_accuracy'], 3)}")

      model_n += 1

Model #1: # hidden units per layer: 32, dropout probability: 0
	test loss: 1.265, test accuracy: 0.854
Model #2: # hidden units per layer: 32, dropout probability: 0.3
	test loss: 1.173, test accuracy: 0.862
Model #3: # hidden units per layer: 64, dropout probability: 0
	test loss: 1.446, test accuracy: 0.861
Model #4: # hidden units per layer: 64, dropout probability: 0.3
	test loss: 1.079, test accuracy: 0.868
