In [8]:
# standard modules
import importlib

# PyTorch modules
import torch
import pytorch_lightning as pl

# import data + model modules
from ukbb_package import ukbb_data
from ukbb_package import ukbb_ica_models
# in case of changes 
importlib.reload(ukbb_data)
importlib.reload(ukbb_ica_models)

# import custom functions
from ukbb_package import utils
# in case of changes
importlib.reload(utils)

# check: is GPU available?
torch.cuda.is_available()

True

In [2]:
# prepare data paths
ukbb_dir = '/ritter/share/data/UKBB/ukb_data/'

# define logging path
path = '../tracking/ICA25/TimeTest/'

In [3]:
utils.make_reproducible()

In [4]:
! gpustat

[1m[37mcuda01                       [m  Sat Jul  8 13:29:07 2023  [1m[30m470.199.02[m
[36m[0][m [34mNVIDIA GeForce GTX 1080 Ti[m |[1m[31m 51'C[m, [1m[32m100 %[m | [36m[1m[33m  863[m / [33m11178[m MB | [1m[30mjihoon[m([33m851M[m) [1m[30mgdm[m([33m4M[m)
[36m[1][m [34mNVIDIA GeForce GTX 1080 Ti[m |[1m[31m 50'C[m, [32m  0 %[m | [36m[1m[33m  867[m / [33m11178[m MB | [1m[30mjihoon[m([33m855M[m) [1m[30mgdm[m([33m4M[m)
[36m[2][m [34mNVIDIA GeForce GTX 1080 Ti[m |[1m[31m 57'C[m, [1m[32m100 %[m | [36m[1m[33m  867[m / [33m11178[m MB | [1m[30mjihoon[m([33m855M[m) [1m[30mgdm[m([33m4M[m)
[36m[3][m [34mNVIDIA GeForce GTX 1080 Ti[m |[1m[31m 59'C[m, [1m[32m100 %[m | [36m[1m[33m  847[m / [33m11178[m MB | [1m[30mjihoon[m([33m835M[m) [1m[30mgdm[m([33m4M[m)
[36m[4][m [34mNVIDIA GeForce GTX 1080 Ti[m |[1m[31m 53'C[m, [32m  0 %[m | [36m[1m[33m  863[m / [33m11178[m MB | [1m[30mjihoon[

# How much time does the GPU save?

Compare time needed to train the baseline model on a GPU vs. on a CPU.

In [5]:
def train_model(log_path, data_path, accelerator, device=None):
    """
    Fuction for using the same model, training, and testing set-up for testing
    the time needed to train with different accelerators.
    Outputs a trained model and the used DataModule.
    Input:
        log_path: path to where logs, checkpoints and data info should be saved
        data_path: path to location where data is saved (expectations see utils.DataModule)
        accelerator: whether to use GPU or CPU, as str
        device: which GPU to run on
    Output:
        trainer: trained model
        datamodule: PyTorch Lightning UKBB DataModule
    """
    # initialise model
    simple_CNN = ukbb_ica_models.simple1DCNN()

    # initialise logger
    logger = utils.logger_init(save_dir=log_path+accelerator+'/')

    # set callbacks
    early_stopping = utils.earlystopping_init()

    checkpoint = utils.checkpoint_init(save_dir=log_path+accelerator+'/')
    
    # threads test
    print('Num threads:', torch.get_num_threads())

    # initialise trainer
    if accelerator == 'gpu':
        trainer = utils.trainer_init(device=device,
                                     logger=logger,
                                     callbacks=[early_stopping, checkpoint])
    
    elif accelerator == 'cpu':
        torch.set_num_threads(1)
        # threads test
        print('Num threads after setting 1:', torch.get_num_threads())
        
        trainer = pl.Trainer(accelerator=accelerator,
                             logger=logger,
                             log_every_n_steps=10,
                             max_epochs=175,
                             callbacks=[early_stopping, checkpoint],
                             deterministic=True)

    # initialise DataModule
    datamodule = ukbb_data.UKBBDataModule(data_path)

    # train model
    trainer.fit(simple_CNN, datamodule=datamodule)
    print('Training complete.')
    
    # save info on which data was used + what the train/val/test split was
    utils.save_data_info(path=path+accelerator+'/', datamodule=datamodule)
    
    print(f'\nTesting model with {accelerator} accelerator...')
    
    # test model
    trainer.test(ckpt_path='best', datamodule=datamodule)
    
    return trainer, datamodule


## Trained with GPU

In [6]:
%%time
gpu_model, gpu_model_data = train_model(path, ukbb_dir, 'gpu', 1)

  rank_zero_warn(
  rank_zero_warn(


Num threads: 16


GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]

  | Name    | Type       | Params
---------------------------------------
0 | act     | ReLU       | 0     
1 | loss    | MSELoss    | 0     
2 | conv1   | Conv1d     | 4.0 K 
3 | conv2   | Conv1d     | 10.3 K
4 | conv3   | Conv1d     | 41.1 K
5 | conv4   | Conv1d     | 164 K 
6 | maxpool | MaxPool1d  | 0     
7 | model   | Sequential | 225 K 
---------------------------------------
225 K     Trainable params
0         Non-trainable params
225 K     Total params
0.903     Total estimated model params size (MB)
  rank_zero_warn(


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training complete.

Testing model with gpu accelerator...


Restoring states from the checkpoint path at /home/laurar/IM/tracking/ICA25/TimeTest/gpu/Checkpoint/models-epoch=43-val_loss=41.06.ckpt
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1,2,3,4,5,6,7]
Loaded model weights from the checkpoint at /home/laurar/IM/tracking/ICA25/TimeTest/gpu/Checkpoint/models-epoch=43-val_loss=41.06.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss            34.3827018737793
        test_mae             4.693734169006348
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
CPU times: user 36min 6s, sys: 1min 2s, total: 37min 9s
Wall time: 51min 14s


## Trained with CPU

In [7]:
%%time
cpu_model, cpu_model_data = train_model(path, ukbb_dir, 'cpu')

  rank_zero_warn(
  rank_zero_warn(
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


Num threads: 16
Num threads after setting 1: 1


  rank_zero_warn(

  | Name    | Type       | Params
---------------------------------------
0 | act     | ReLU       | 0     
1 | loss    | MSELoss    | 0     
2 | conv1   | Conv1d     | 4.0 K 
3 | conv2   | Conv1d     | 10.3 K
4 | conv3   | Conv1d     | 41.1 K
5 | conv4   | Conv1d     | 164 K 
6 | maxpool | MaxPool1d  | 0     
7 | model   | Sequential | 225 K 
---------------------------------------
225 K     Trainable params
0         Non-trainable params
225 K     Total params
0.903     Total estimated model params size (MB)
  rank_zero_warn(


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training complete.

Testing model with cpu accelerator...


Restoring states from the checkpoint path at /home/laurar/IM/tracking/ICA25/TimeTest/cpu/Checkpoint/models-epoch=43-val_loss=41.19.ckpt
Loaded model weights from the checkpoint at /home/laurar/IM/tracking/ICA25/TimeTest/cpu/Checkpoint/models-epoch=43-val_loss=41.19.ckpt
  rank_zero_warn(


Testing: 0it [00:00, ?it/s]

────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       Test metric             DataLoader 0
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
        test_loss           34.376319885253906
        test_mae             4.698326110839844
────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
CPU times: user 48min 7s, sys: 1min 15s, total: 49min 22s
Wall time: 1h 3min 15s


# Test how long it takes to run a model with >1Mio parameters

In [9]:
%%time
# initialise trainer
torch.set_num_threads(1)

# initialise model
# num encoder params: 3456000
variable_CNN = ukbb_ica_models.variable1DCNN(in_channels=21,
                                            kernel_size=5,
                                            lr=1e-3,
                                            depth=4,
                                            start_out=128,
                                            stride=2,
                                            conv_dropout=0,
                                            final_dropout=0,
                                            weight_decay=0.001,
                                            double_conv=False,
                                            batch_norm=False)

# initialise logger
logger = utils.logger_init(save_dir=path+'mio/')

# set callbacks
early_stopping = utils.earlystopping_init()

trainer = pl.Trainer(accelerator='cpu',
                     logger=logger,
                     log_every_n_steps=10,
                     max_epochs=175,
                     callbacks=[early_stopping],
                     enable_checkpointing=False,
                     deterministic=True)

# initialise DataModule
datamodule = ukbb_data.UKBBDataModule(ukbb_dir, good_components=True)

# train model
trainer.fit(variable_CNN, datamodule=datamodule)
print('Training complete.')

  rank_zero_warn(
  rank_zero_warn(
GPU available: True (cuda), used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
  rank_zero_warn(

  | Name    | Type       | Params
---------------------------------------
0 | act     | ReLU       | 0     
1 | loss    | MSELoss    | 0     
2 | maxpool | MaxPool1d  | 0     
3 | encoder | Sequential | 3.5 M 
4 | decoder | Sequential | 24.6 K
---------------------------------------
3.5 M     Trainable params
0         Non-trainable params
3.5 M     Total params
13.922    Total estimated model params size (MB)
  rank_zero_warn(


Sanity Checking: 0it [00:00, ?it/s]

  rank_zero_warn(
  rank_zero_warn(


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Training complete.
CPU times: user 2h 17min 33s, sys: 5min 31s, total: 2h 23min 5s
Wall time: 2h 32min 35s
