In [1]:
import torch
import pandas as pd
import numpy as np

rng = np.random.default_rng()

# Testing CNN Tranformer Model Class

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from pathlib import Path
import os
import glob
import time
from datetime import datetime
import matplotlib.pyplot as plt
import math
import random

import model.hyperparameters as hp
from model.Transformer import TransformerModel
from utils.train import Transformer_train_fn
from utils.dataset import (
    SmartwatchDataset, 
    SmartwatchAugmentTransformer, 
    get_file_lists
)
from utils.utils import test_Transformer

%env CUDA_VISIBLE_DEVICES=1

env: CUDA_VISIBLE_DEVICES=1


Try to train the model...

In [3]:

# Paths
SAVE_PATH = Path(f'outputs/{datetime.now().strftime("%d-%m-%Y_%H%M%S")}')

TRAIN = True

if TRAIN == True:
    from torch.utils.tensorboard import SummaryWriter
    writer = SummaryWriter(log_dir=f'{SAVE_PATH}/tensorboard')
    TEST_PATH = SAVE_PATH
else:
    TEST_PATH = Path(input('Enter path to folder containing weights: '))


2023-04-16 06:25:38.211297: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64
2023-04-16 06:25:38.211406: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory; LD_LIBRARY_PATH: /usr/local/nvidia/lib:/usr/local/nvidia/lib64


In [4]:
# Get .csv files
train_files, val_files, test_files = get_file_lists(
    val_sub_list=['05', 10, 15, 20, 25, 30],
    test_sub_list=[35],
)

# Get dataloaders
train_dataset = SmartwatchDataset(train_files, sample_period=0.04)
train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=hp.TRANSFORMER_BATCH_SIZE, collate_fn=SmartwatchAugmentTransformer(num_heads=hp.NUM_HEADS), drop_last=True, shuffle=True)

val_dataset = SmartwatchDataset(val_files)
val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=hp.TRANSFORMER_BATCH_SIZE, collate_fn=SmartwatchAugmentTransformer(num_heads=hp.NUM_HEADS), drop_last=True, shuffle=True)

test_dataset = SmartwatchDataset(test_files)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=hp.TRANSFORMER_BATCH_SIZE, collate_fn=SmartwatchAugmentTransformer(num_heads=hp.NUM_HEADS), drop_last=True, shuffle=False)



In [5]:
# Initialize transformer
transformer_model = TransformerModel(
    input_size=9,
    stride=2,
    kernel_size=32,
    seq_len=512,
    channels=[32],
    dropout=0.1,
    n_heads=hp.NUM_HEADS,
    num_encoder_layers=1,
    num_decoder_layers=1,
    downsample=False
).to(hp.DEVICE)

# Initialize loss functions
loss_fn = nn.MSELoss()
metric_loss_fn = nn.L1Loss()

# Initialize optimizers
transformer_optimizer = optim.Adam(transformer_model.parameters(), lr=hp.TRANSFORMER_LEARNING_RATE)



In [6]:
if TRAIN == True:
    Transformer_train_fn(
        train_loader,
        val_loader,
        transformer_model,
        transformer_optimizer,
        loss_fn,
        metric_loss_fn,
        hp.NUM_EPOCH,
        hp.DEVICE,
        SAVE_PATH,
        writer,
        hp.TEACHER_FORCE_RATIO,
        checkpoint=None,
        batch_size=hp.TRANSFORMER_BATCH_SIZE
    )



===== Epoch: 0 =====
torch.Size([4, 512, 9])
torch.Size([4, 512, 7])
torch.Size([4, 512, 7])
For i is 1, torch.Size([4, 512, 7])
For i is 2, torch.Size([4, 512, 7])
For i is 3, torch.Size([4, 512, 7])
For i is 4, torch.Size([4, 512, 7])
For i is 5, torch.Size([4, 512, 7])
For i is 6, torch.Size([4, 512, 7])
For i is 7, torch.Size([4, 512, 7])
For i is 8, torch.Size([4, 512, 7])
For i is 9, torch.Size([4, 512, 7])
For i is 10, torch.Size([4, 512, 7])
For i is 11, torch.Size([4, 512, 7])
For i is 12, torch.Size([4, 512, 7])
For i is 13, torch.Size([4, 512, 7])
For i is 14, torch.Size([4, 512, 7])
For i is 15, torch.Size([4, 512, 7])
For i is 16, torch.Size([4, 512, 7])
For i is 17, torch.Size([4, 512, 7])
For i is 18, torch.Size([4, 512, 7])
For i is 19, torch.Size([4, 512, 7])
For i is 20, torch.Size([4, 512, 7])
For i is 21, torch.Size([4, 512, 7])
For i is 22, torch.Size([4, 512, 7])
For i is 23, torch.Size([4, 512, 7])
For i is 24, torch.Size([4, 512, 7])
For i is 25, torch.Size([4, 

OutOfMemoryError: CUDA out of memory. Tried to allocate 20.00 MiB (GPU 0; 11.91 GiB total capacity; 8.80 GiB already allocated; 22.06 MiB free; 8.81 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [None]:
test_Transformer(
    test_loader,
    transformer_model,
    loss_fn,
    metric_loss_fn,
    SAVE_PATH,
    hp.DEVICE,
)


In [None]:
pytorch_total_params = sum(p.numel() for p in transformer_model.parameters())
print(pytorch_total_params)
pytorch_trainable_params = sum(p.numel() for p in transformer_model.parameters() if p.requires_grad)
print(pytorch_trainable_params)

280231
280231
