In [2]:
# add src to the sys path of this notebook
import sys
import pathlib
sys.path.append(str(pathlib.Path().absolute().parent / "src"))
from models.gru import GRUClassifier, CNN_GRUClassifier
import data.paths as paths
from data.load_data import list_logs
from utils.config_loader import load_config
from utils.seed import set_seed
from data.sensor_dataset import SensorDataset
import torch
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import numpy as np
import os
from datetime import datetime
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%load_ext autoreload
%autoreload 2

In [3]:
data_cfg_path, fit_cfg_path, model_cfg_path = paths.CONFIG_FOLDER / 'data' / 'base_forward.yml', paths.CONFIG_FOLDER / 'fit' / 'base_fit.yml', paths.CONFIG_FOLDER / 'model' / 'base_model_dropout.yml'
data_cfg, fit_cfg, model_cfg = load_config(data_cfg_path, fit_cfg_path, model_cfg_path)

print(f"Window size: {data_cfg['window_size']} s., downsampling freq.: {data_cfg['downsampling_freq']} Hz")

RUN_ID = datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
torch_generator = set_seed(data_cfg["seed"])
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load Data
log_names = list_logs(paths.PAPER_EXPERIMENT_DATA_FOLDER)
if data_cfg["direction"] != "Both":
    log_names = log_names[log_names["direction"] == data_cfg["direction"]].reset_index(drop=True)
print(f"Total logs for {data_cfg['direction']} direction: {len(log_names)}")

train_log_names, test_log_names = train_test_split(log_names, test_size=fit_cfg["test_size_ratio"], random_state=data_cfg["seed"])
train_log_names = train_log_names.reset_index(drop=True)
test_log_names = test_log_names.reset_index(drop=True)
print(f"Training logs: {len(train_log_names)}, Testing logs: {len(test_log_names)}")
train_dataset = SensorDataset(train_log_names, window_size=data_cfg["window_size"], mode='train', downsampling_freq=data_cfg["downsampling_freq"])
test_dataset  = SensorDataset(test_log_names,  window_size=data_cfg["window_size"], mode='eval', downsampling_freq=data_cfg["downsampling_freq"], mean_force=train_dataset.mean_force, std_force=train_dataset.std_force)

train_loader = DataLoader(train_dataset, batch_size=fit_cfg["batch_size"], shuffle=True, generator=torch_generator)
test_loader  = DataLoader(test_dataset, batch_size=1, shuffle=False, generator=torch_generator)

print(f"Final training shape: {train_dataset[0][0].shape} (window size s. * downsampling freq., number of sensor channels), Final testing shape: {test_dataset[0][0].shape}")

Window size: 2 s., downsampling freq.: 10 Hz
Total logs for Forward direction: 98
Training logs: 78, Testing logs: 20
Final training shape: torch.Size([20, 1]) (window size s. * downsampling freq., number of sensor channels), Final testing shape: torch.Size([356, 1])


In [3]:
# Define model
num_channels = train_dataset[0][0].shape[1]  # number of sensor channels
if model_cfg.get("cnn_channels", None):
    gru_model = CNN_GRUClassifier(input_size=num_channels, hidden_size=model_cfg["hidden_size"], num_layers=model_cfg["num_layers"], output_size=1, cnn_channels=model_cfg["cnn_channels"], dropout=model_cfg["dropout"]).to(device)
else:
    gru_model = GRUClassifier(input_size=num_channels, hidden_size=model_cfg["hidden_size"], num_layers=model_cfg["num_layers"], output_size=1, dropout=model_cfg["dropout"]).to(device)

In [4]:
# get a batch of training data
data_iter = iter(train_loader)
inputs, targets = next(data_iter)
inputs, targets = inputs.to(device), targets.to(device)
print(f"Input shape: {inputs.shape}, Target shape: {targets.shape}, (Batch size: {fit_cfg['batch_size']}, Window size (samples): {data_cfg['window_size'] * data_cfg['downsampling_freq']}, Num channels: {num_channels})")

Input shape: torch.Size([32, 20, 1]), Target shape: torch.Size([32, 1]), (Batch size: 32, Window size (samples): 20, Num channels: 1)


In [6]:
outputs = gru_model.forward(inputs, debug=True)

Input shape: torch.Size([32, 20, 1]), (Bacth size: 32, Sequence length: 20, Input size: 1)
hidden state shape: torch.Size([2, 32, 16]), (Number of layers: 2, Batch size: 32, Hidden size: 16)
GRU output shape: torch.Size([32, 20, 16]), (Batch size: 32, Sequence length: 20, Hidden size: 16)
Fully connected output shape: torch.Size([32, 1]), (Batch size: 32, Output size: 1)


In [7]:
test_inputs, test_targets = next(iter(test_loader))
test_inputs, test_targets = test_inputs.to(device), test_targets.to(device)
print(f"Test Input shape: {test_inputs.shape}, Test Target shape: {test_targets.shape}, (Batch size: 1, Window size (samples): {data_cfg['window_size'] * data_cfg['downsampling_freq']}, Num channels: {num_channels})")
test_outputs = gru_model.forward(test_inputs, debug=True)

Test Input shape: torch.Size([1, 356, 1]), Test Target shape: torch.Size([1, 356, 1]), (Batch size: 1, Window size (samples): 20, Num channels: 1)
Input shape: torch.Size([1, 356, 1]), (Bacth size: 1, Sequence length: 356, Input size: 1)
hidden state shape: torch.Size([2, 1, 16]), (Number of layers: 2, Batch size: 1, Hidden size: 16)
GRU output shape: torch.Size([1, 356, 16]), (Batch size: 1, Sequence length: 356, Hidden size: 16)
Fully connected output shape: torch.Size([1, 1]), (Batch size: 1, Output size: 1)


In [15]:
# print model summary
from torchsummary import summary
summary(gru_model, input_size=(data_cfg["window_size"] * data_cfg["downsampling_freq"], num_channels))
# print all parameters
for name, param in gru_model.named_parameters():
    if param.requires_grad:
        print(f"Parameter: {name}, Shape: {param.data.shape}")

----------------------------------------------------------------
        Layer (type)               Output Shape         Param #
               GRU-1  [[-1, 50, 16], [-1, 2, 16]]               0
            Linear-2                [-1, 50, 1]              17
Total params: 17
Trainable params: 17
Non-trainable params: 0
----------------------------------------------------------------
Input size (MB): 0.00
Forward/backward pass size (MB): 0.19
Params size (MB): 0.00
Estimated Total Size (MB): 0.20
----------------------------------------------------------------
Parameter: gru.weight_ih_l0, Shape: torch.Size([48, 1])
Parameter: gru.weight_hh_l0, Shape: torch.Size([48, 16])
Parameter: gru.bias_ih_l0, Shape: torch.Size([48])
Parameter: gru.bias_hh_l0, Shape: torch.Size([48])
Parameter: gru.weight_ih_l1, Shape: torch.Size([48, 16])
Parameter: gru.weight_hh_l1, Shape: torch.Size([48, 16])
Parameter: gru.bias_ih_l1, Shape: torch.Size([48])
Parameter: gru.bias_hh_l1, Shape: torch.Size([48])
Par

In [16]:
print(f"48 because GRU has hidden size {model_cfg['hidden_size']} and 3 gates (reset, update, new hidden), so 16 * 3 = 48")

48 because GRU has hidden size 16 and 3 gates (reset, update, new hidden), so 16 * 3 = 48


In [27]:
total_params = sum(p.numel() for p in gru_model.parameters() if p.requires_grad)
print(f"Total trainable scalar parameters: {total_params}, which is consistent with {48*1+48*16+48+48+48*16+48*16+48+48+16+1}")

Total trainable scalar parameters: 2561, which is consistent with 2561
