In [36]:
import logging

logging.basicConfig(format='%(asctime)s | %(levelname)s : %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)
logger.info("Loading packages ...")

import os
import sys
import time
import pickle
import json
from argparse import Namespace

# 3rd party packages
from tqdm import tqdm
import torch
from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter

# Project modules
from src.options import Options
from src.running import setup, pipeline_factory, validate, check_progress, NEG_METRICS
from src.utils import utils
from src.datasets.data import data_factory, Normalizer
from src.datasets.datasplit import split_dataset
from src.models.ts_transformer import model_factory
from src.models.loss import get_loss_module
from src.optimizers import get_optimizer

import neptune.new as neptune

2022-09-16 10:19:10,237 | INFO : Loading packages ...


In [37]:
run = neptune.init(
    project="FYP-Group22/Transformer-SSC",
    api_token="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiJkNWJjMDdhNC05NWY5LTQwNWQtYTQyNi0zNjNmYmYwZDg3M2YifQ==",
)  # your credentials

https://app.neptune.ai/FYP-Group22/Transformer-SSC/e/TRAN-19
Remember to stop your run once you’ve finished logging your metadata (https://docs.neptune.ai/api-reference/run#.stop). It will be stopped automatically only when the notebook kernel/interactive console is terminated.


In [46]:
config = {}
config["output_dir"] = './tmp'
config["seed"] = 123
config["gpu"] = "0" # activate gpu > o, on cpu > -1
config["n_proc"] = 1

# loading pre-trained model
config["load_model"] = None
config["test_only"] = None
config["config_filepath"] = None

# experiment config
config["task"] = "imputation"
config["experiment_name"] = "first_test"
config["no_timestamp"] = True

# dataset settings
config["limit_size"] = 300 
config["data_class"] = 'kuhar'
config["data_dir"] = '../../Data/KU-HAR_time_domain_subsamples_20750x300.csv'
config["val_ratio"] = 0.1 
config["test_ratio"] = 0.1 
config["norm_from"] = False 
config['normalization'] = 'standardization'
config["record_file"] = "./tmp/Imputation_records.xls"
config["records_file"] = "./tmp/records.xls"
config["num_workers"] = 0
config["console"] = True
config["save_all"] = False
config["comment"] = "mvts_transformer | KU-HAR dataset Dynamic Actions | nipdep 1DConv processing layer | first test"

# test, val from seperate files 
config["test_pattern"] = False 
config["val_pattern"] = False 
config["test_from"] = False

# freeze model weight for fine-tunning
config["freeze"] = False

# if task is a imputation 
config["masking_ratio"] = 0.15
config["mean_mask_length"] = 5
config["mask_mode"] = "seperate"
config["mask_distribution"] = "geometric"
config["exclude_feats"] = None 
config["mask_feats"] = '0, 1'
config["start_hint"] = 0.0
config["end_hint"] = 0.0 
config["harden"] = True

# model parameters
config['model'] = 'transformer'
config["pos_encoding"] = "learnable"
config["d_model"] = 128 
config["dim_feedforward"] = 256
config["num_heads"] = 8
config["num_layers"] = 3
config["dropout"] = 0.1
config["activation"] = 'relu'
config["normalization_layer"] = "BatchNorm"
config["data_window_len"] = None 
config["max_seq_len"] = 300

# model training parameters
config["epochs"] = 10
config["lr"] = 0.0005
config["val_interval"] = 1
config["lr_step"] = '1000'
config["lr_factor"] = '0.1'
config["l2_reg"] = 0
config["global_reg"] = True
config["key_metric"] = "loss"
config["optimizer"] = "Adam"
config["batch_size"] = 32
config["print_interval"] = 1

setup(config)

2022-09-16 10:22:05,057 | INFO : Stored configuration file in './tmp\first_test'


{'output_dir': './tmp\\first_test',
 'seed': 123,
 'gpu': '0',
 'n_proc': 1,
 'load_model': None,
 'test_only': None,
 'config_filepath': None,
 'task': 'imputation',
 'experiment_name': 'first_test',
 'no_timestamp': True,
 'limit_size': 300,
 'data_class': 'kuhar',
 'data_dir': '../../Data/KU-HAR_time_domain_subsamples_20750x300.csv',
 'val_ratio': 0.1,
 'test_ratio': 0.1,
 'norm_from': False,
 'normalization': 'standardization',
 'record_file': './tmp/Imputation_records.xls',
 'records_file': './tmp/records.xls',
 'num_workers': 0,
 'console': True,
 'save_all': False,
 'comment': 'mvts_transformer | KU-HAR dataset Dynamic Actions | nipdep 1DConv processing layer | first test',
 'test_pattern': False,
 'val_pattern': False,
 'test_from': False,
 'freeze': False,
 'masking_ratio': 0.15,
 'mean_mask_length': 5,
 'mask_mode': 'seperate',
 'mask_distribution': 'geometric',
 'exclude_feats': None,
 'mask_feats': '0, 1',
 'start_hint': 0.0,
 'end_hint': 0.0,
 'harden': True,
 'model': 'tr

In [39]:
run["parameters"] = config
# config['class_names'] = ['Talk-sit', 'Talk-stand', 'Stand-sit', 'Lay-stand', 'Pick', 'Jump', 'Push-up', 'Sit-up', 'Walk', 'Walk-backward', 'Walk-circle', 'Run', 'Stair-up', 'Stair-down', 'Table-tennis']
config['class_names'] = ['Stand', 'Sit', 'Talk-sit', 'Talk-stand', 'Stand-sit', 'Lay', 'Lay-stand', 'Pick', 'Jump', 'Push-up', 'Sit-up', 'Walk', 'Walk-backward', 'Walk-circle', 'Run', 'Stair-up', 'Stair-down', 'Table-tennis']

In [41]:
total_epoch_time = 0
total_eval_time = 0

total_start_time = time.time()

# Add file logging besides stdout
file_handler = logging.FileHandler(os.path.join(config['output_dir'], 'output.log'))
logger.addHandler(file_handler)

logger.info('Running:\n{}\n'.format(' '.join(sys.argv)))  # command used to run

if config['seed'] is not None:
    torch.manual_seed(config['seed'])

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
logger.info("Using device: {}".format(device))
if device == 'cuda':
    logger.info("Device index: {}".format(torch.cuda.current_device()))

2022-09-16 10:19:19,585 | INFO : Running:
c:\Users\deela\anaconda3\envs\mvts_trans\lib\site-packages\ipykernel_launcher.py --ip=127.0.0.1 --stdin=9013 --control=9011 --hb=9010 --Session.signature_scheme="hmac-sha256" --Session.key=b"4334a332-653a-4c63-bb0e-b94496ffd1cf" --shell=9012 --transport="tcp" --iopub=9014 --f=c:\Users\deela\AppData\Roaming\jupyter\runtime\kernel-v2-5100Er5FAaxyLBFx.json

2022-09-16 10:19:19,589 | INFO : Using device: cuda


In [42]:
# Build data
logger.info("Loading and preprocessing data ...")
data_class = data_factory[config['data_class']]
my_data = data_class(config['data_dir'], n_proc=config['n_proc'], limit_size=config['limit_size'], config=config, filter_classes=[])
feat_dim = my_data.feature_df.shape[1]  # dimensionality of data features
if config['task'] == 'classification':
    validation_method = 'StratifiedShuffleSplit'
    labels = my_data.labels_df.label.values
    print(labels)
else:
    validation_method = 'ShuffleSplit'
    labels = None

2022-09-16 10:19:20,931 | INFO : Loading and preprocessing data ...


In [43]:
# Split dataset
test_data = my_data
test_indices = None  # will be converted to empty list in `split_dataset`, if also test_set_ratio == 0
val_data = my_data
val_indices = []
if config['test_pattern']:  # used if test data come from different files / file patterns
    test_data = data_class(config['data_dir'], n_proc=-1, config=config)
    test_indices = test_data.all_IDs
if config['test_from']:  # load test IDs directly from file, if available, otherwise use `test_set_ratio`. Can work together with `test_pattern`
    test_indices = list(set([line.rstrip() for line in open(config['test_from']).readlines()]))
    try:
        test_indices = [int(ind) for ind in test_indices]  # integer indices
    except ValueError:
        pass  # in case indices are non-integers
    logger.info("Loaded {} test IDs from file: '{}'".format(len(test_indices), config['test_from']))
if config['val_pattern']:  # used if val data come from different files / file patterns
    val_data = data_class(config['data_dir'], n_proc=-1, config=config)
    val_indices = val_data.all_IDs

# Note: currently a validation set must exist, either with `val_pattern` or `val_ratio`
# Using a `val_pattern` means that `val_ratio` == 0 and `test_ratio` == 0
if config['val_ratio'] > 0:
    train_indices, val_indices, test_indices = split_dataset(data_indices=my_data.all_IDs,
                                                                validation_method=validation_method,
                                                                n_splits=1,
                                                                validation_ratio=config['val_ratio'],
                                                                test_set_ratio=config['test_ratio'],  # used only if test_indices not explicitly specified
                                                                test_indices=test_indices,
                                                                random_seed=1337,
                                                                labels=labels)
    train_indices = train_indices[0]  # `split_dataset` returns a list of indices *per fold/split*
    val_indices = val_indices[0]  # `split_dataset` returns a list of indices *per fold/split*
else:
    train_indices = my_data.all_IDs
    if test_indices is None:
        test_indices = []

logger.info("{} samples may be used for training".format(len(train_indices)))
logger.info("{} samples will be used for validation".format(len(val_indices)))
logger.info("{} samples will be used for testing".format(len(test_indices)))

with open(os.path.join(config['output_dir'], 'data_indices.json'), 'w') as f:
    try:
        json.dump({'train_indices': list(map(int, train_indices)),
                    'val_indices': list(map(int, val_indices)),
                    'test_indices': list(map(int, test_indices))}, f, indent=4)
    except ValueError:  # in case indices are non-integers
        json.dump({'train_indices': list(train_indices),
                    'val_indices': list(val_indices),
                    'test_indices': list(test_indices)}, f, indent=4)

2022-09-16 10:21:08,505 | INFO : 16806 samples may be used for training
2022-09-16 10:21:08,506 | INFO : 1868 samples will be used for validation
2022-09-16 10:21:08,507 | INFO : 2075 samples will be used for testing


In [47]:
# Pre-process features
normalizer = None
if config['norm_from']:
    with open(config['norm_from'], 'rb') as f:
        norm_dict = pickle.load(f)
    normalizer = Normalizer(**norm_dict)
elif config['normalization'] is not None:
    normalizer = Normalizer(config['normalization'])
    my_data.feature_df.loc[train_indices] = normalizer.normalize(my_data.feature_df.loc[train_indices])
    if not config['normalization'].startswith('per_sample'):
        # get normalizing values from training set and store for future use
        norm_dict = normalizer.__dict__
        with open(os.path.join(config['output_dir'], 'normalization.pickle'), 'wb') as f:
            pickle.dump(norm_dict, f, pickle.HIGHEST_PROTOCOL)
if normalizer is not None:
    if len(val_indices):
        print("Validation dataset normalization")
        val_data.feature_df.loc[val_indices] = normalizer.normalize(val_data.feature_df.loc[val_indices])
    if len(test_indices):
        print("Test dataset normalization")
        test_data.feature_df.loc[test_indices] = normalizer.normalize(test_data.feature_df.loc[test_indices])

Validation dataset normalization
Test dataset normalization


In [52]:
val_data.feature_df.accelX.min()

-20.35786451784537

In [53]:
# Create model
logger.info("Creating model ...")
model = model_factory(config, my_data)

if config['freeze']:
    for name, param in model.named_parameters():
        if name.startswith('output_layer'):
            param.requires_grad = True
        else:
            param.requires_grad = False

logger.info("Model:\n{}".format(model))
logger.info("Total number of parameters: {}".format(utils.count_parameters(model)))
logger.info("Trainable parameters: {}".format(utils.count_parameters(model, trainable=True)))


2022-09-16 10:23:19,671 | INFO : Creating model ...
2022-09-16 10:23:19,686 | INFO : Model:
TSTransformerEncoder(
  (project_inp): Linear(in_features=6, out_features=128, bias=True)
  (pos_enc): LearnablePositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerBatchNormEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (norm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): D

In [54]:
# Initialize optimizer

if config['global_reg']:
    weight_decay = config['l2_reg']
    output_reg = None
else:
    weight_decay = 0
    output_reg = config['l2_reg']

optim_class = get_optimizer(config['optimizer'])
optimizer = optim_class(model.parameters(), lr=config['lr'], weight_decay=weight_decay)

start_epoch = 0
lr_step = 0  # current step index of `lr_step`
lr = config['lr']  # current learning step
# Load model and optimizer state
if config["load_model"]:
    model, optimizer, start_epoch = utils.load_model(model, config['load_model'], optimizer, config['resume'],
                                                        config['change_output'],
                                                        config['lr'],
                                                        config['lr_step'],
                                                        config['lr_factor'])
model.to(device)

loss_module = get_loss_module(config)

if config['test_only'] == 'testset':  # Only evaluate and skip training
    dataset_class, collate_fn, runner_class = pipeline_factory(config)
    test_dataset = dataset_class(test_data, test_indices)

    test_loader = DataLoader(dataset=test_dataset,
                                batch_size=config['batch_size'],
                                shuffle=False,
                                num_workers=config['num_workers'],
                                pin_memory=True,
                                collate_fn=lambda x: collate_fn(x, max_len=model.max_len))
    test_evaluator = runner_class(model, test_loader, device, loss_module,
                                        print_interval=config['print_interval'], console=config['console'])
    aggr_metrics_test, per_batch_test = test_evaluator.evaluate(keep_all=True)
    print_str = 'Test Summary: '
    for k, v in aggr_metrics_test.items():
        print_str += '{}: {:8f} | '.format(k, v)
    logger.info(print_str)
    # return

In [55]:
# Initialize data generators
dataset_class, collate_fn, runner_class = pipeline_factory(config)
val_dataset = dataset_class(val_data, val_indices)

val_loader = DataLoader(dataset=val_dataset,
                        batch_size=config['batch_size'],
                        shuffle=False,
                        num_workers=config['num_workers'],
                        pin_memory=True,
                        collate_fn=lambda x: collate_fn(x, max_len=model.max_len))

train_dataset = dataset_class(my_data, train_indices)

train_loader = DataLoader(dataset=train_dataset,
                            batch_size=config['batch_size'],
                            shuffle=True,
                            num_workers=config['num_workers'],
                            pin_memory=True,
                            collate_fn=lambda x: collate_fn(x, max_len=model.max_len))

trainer = runner_class(model, train_loader, device, loss_module, optimizer, l2_reg=output_reg,
                                print_interval=config['print_interval'], console=config['console'])
val_evaluator = runner_class(model, val_loader, device, loss_module,
                                    print_interval=config['print_interval'], console=config['console'])

tensorboard_writer = SummaryWriter(config['tensorboard_dir'])

best_value = 1e16 if config['key_metric'] in NEG_METRICS else -1e16  # initialize with +inf or -inf depending on key metric
metrics = []  # (for validation) list of lists: for each epoch, stores metrics like loss, ...
best_metrics = {}

In [56]:
for i in train_loader:
    # print(i, len(i))
    X, Xm, m, p, I = i 
    print(X.shape, X.max())
    break

torch.Size([32, 300, 6]) tensor(0.0509)


In [57]:
# Evaluate on validation before training
aggr_metrics_val, best_metrics, best_value = validate(val_evaluator, tensorboard_writer, config, best_metrics,
                                                        best_value, epoch=0)
metrics_names, metrics_values = zip(*aggr_metrics_val.items())
metrics.append(list(metrics_values))

logger.info('Starting training...')
for epoch in tqdm(range(start_epoch + 1, config["epochs"] + 1), desc='Training Epoch', leave=False):
    mark = epoch if config['save_all'] else 'last'
    epoch_start_time = time.time()
    aggr_metrics_train = trainer.train_epoch(epoch)  # dictionary of aggregate epoch metrics
    # print(f"train metrics >> {aggr_metrics_train}")
    run['train/loss'].log(aggr_metrics_train['loss'])
    epoch_runtime = time.time() - epoch_start_time
    print_str = 'Epoch {} Training Summary: '.format(epoch)
    for k, v in aggr_metrics_train.items():
        tensorboard_writer.add_scalar('{}/train'.format(k), v, epoch)
        print_str += '{}: {:8f} | '.format(k, v)
    logger.info(print_str)
    logger.info("Epoch runtime: {} hours, {} minutes, {} seconds\n".format(*utils.readable_time(epoch_runtime)))
    total_epoch_time += epoch_runtime
    avg_epoch_time = total_epoch_time / (epoch - start_epoch)
    avg_batch_time = avg_epoch_time / len(train_loader)
    avg_sample_time = avg_epoch_time / len(train_dataset)
    logger.info("Avg epoch train. time: {} hours, {} minutes, {} seconds".format(*utils.readable_time(avg_epoch_time)))
    logger.info("Avg batch train. time: {} seconds".format(avg_batch_time))
    logger.info("Avg sample train. time: {} seconds".format(avg_sample_time))

    # evaluate if first or last epoch or at specified interval
    if (epoch == config["epochs"]) or (epoch == start_epoch + 1) or (epoch % config['val_interval'] == 0):
        aggr_metrics_val, best_metrics, best_value = validate(val_evaluator, tensorboard_writer, config,
                                                                best_metrics, best_value, epoch)
        # print(f"eval metrics >> {aggr_metrics_val}")
        run['eval/loss'].log(aggr_metrics_val['loss'])
        # run['eval/accuracy'].log(aggr_metrics_val['accuracy'])
        # run['eval/precision'].log(aggr_metrics_val['precision'])
        metrics_names, metrics_values = zip(*aggr_metrics_val.items())
        metrics.append(list(metrics_values))

    # utils.save_model(os.path.join(config['save_dir'], 'model_{}.pth'.format(mark)), epoch, model, optimizer)

    # Learning rate scheduling
    if epoch == config['lr_step'][lr_step]:
        utils.save_model(os.path.join(config['save_dir'], 'model_{}.pth'.format(epoch)), epoch, model, optimizer)
        lr = lr * config['lr_factor'][lr_step]
        if lr_step < len(config['lr_step']) - 1:  # so that this index does not get out of bounds
            lr_step += 1
        logger.info('Learning rate updated to: ', lr)
        for param_group in optimizer.param_groups:
            param_group['lr'] = lr

    # Difficulty scheduling
    if config['harden'] and check_progress(epoch):
        train_loader.dataset.update()
        val_loader.dataset.update()

# Export evolution of metrics over epochs
# header = metrics_names #@nipdep
# metrics_filepath = os.path.join(config["output_dir"], "metrics_" + config["experiment_name"] + ".xls")
# book = utils.export_performance_metrics(metrics_filepath, metrics, header, sheet_name="metrics")

# # Export record metrics to a file accumulating records from all experiments
# utils.register_record(config["records_file"], config["initial_timestamp"], config["experiment_name"],
#                         best_metrics, aggr_metrics_val, comment=config['comment'])

logger.info('Best {} was {}. Other metrics: {}'.format(config['key_metric'], best_value, best_metrics))
logger.info('All Done!')

total_runtime = time.time() - total_start_time
logger.info("Total runtime: {} hours, {} minutes, {} seconds\n".format(*utils.readable_time(total_runtime)))

run.stop()

2022-09-16 10:23:28,835 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 0  98.3% | batch:        58 of        59	|	loss: 0.245817

2022-09-16 10:23:36,139 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.303072690963745 seconds

2022-09-16 10:23:36,141 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.673379063606262 seconds
2022-09-16 10:23:36,143 | INFO : Avg batch val. time: 0.1300572722645129 seconds
2022-09-16 10:23:36,144 | INFO : Avg sample val. time: 0.004107804637904851 seconds
2022-09-16 10:23:36,148 | INFO : Epoch 0 Validation Summary: epoch: 0.000000 | loss: 1.856899 | 





  val = np.asanyarray(val)
2022-09-16 10:23:36,252 | INFO : Starting training...
Training Epoch:   0%|          | 0/10 [00:00<?, ?it/s]

[KTraining Epoch 1  99.8% | batch:       525 of       526	|	loss: 0.001213912

2022-09-16 10:27:17,812 | INFO : Epoch 1 Training Summary: epoch: 1.000000 | loss: 0.545071 | 
2022-09-16 10:27:17,813 | INFO : Epoch runtime: 0.0 hours, 3.0 minutes, 41.55656814575195 seconds

2022-09-16 10:27:17,816 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 41.55656814575195 seconds
2022-09-16 10:27:17,817 | INFO : Avg batch train. time: 0.4212102056002889 seconds
2022-09-16 10:27:17,819 | INFO : Avg sample train. time: 0.013183182681527546 seconds
2022-09-16 10:27:17,820 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 1  98.3% | batch:        58 of        59	|	loss: 0.000435387

2022-09-16 10:27:25,081 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.259398698806763 seconds

2022-09-16 10:27:25,082 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.590582990646363 seconds
2022-09-16 10:27:25,083 | INFO : Avg batch val. time: 0.12865394899400615 seconds
2022-09-16 10:27:25,085 | INFO : Avg sample val. time: 0.0040634812583759975 seconds





2022-09-16 10:27:25,090 | INFO : Epoch 1 Validation Summary: epoch: 1.000000 | loss: 0.713198 | 
Training Epoch:  10%|█         | 1/10 [03:48<34:20, 228.93s/it]

[KTraining Epoch 2  99.8% | batch:       525 of       526	|	loss: 0.000281339

2022-09-16 10:31:09,278 | INFO : Epoch 2 Training Summary: epoch: 2.000000 | loss: 0.396065 | 
2022-09-16 10:31:09,279 | INFO : Epoch runtime: 0.0 hours, 3.0 minutes, 44.092564821243286 seconds

2022-09-16 10:31:09,280 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 42.82456648349762 seconds
2022-09-16 10:31:09,282 | INFO : Avg batch train. time: 0.42362084882794226 seconds
2022-09-16 10:31:09,283 | INFO : Avg sample train. time: 0.013258631826936667 seconds
2022-09-16 10:31:09,283 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 2  98.3% | batch:        58 of        59	|	loss: 0.000215018

2022-09-16 10:31:16,905 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.620870590209961 seconds

2022-09-16 10:31:16,907 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.595630923906962 seconds
2022-09-16 10:31:16,908 | INFO : Avg batch val. time: 0.12873950718486377 seconds
2022-09-16 10:31:16,910 | INFO : Avg sample val. time: 0.004066183578108652 seconds
2022-09-16 10:31:16,912 | INFO : Epoch 2 Validation Summary: epoch: 2.000000 | loss: 0.668020 | 





Training Epoch:  20%|██        | 2/10 [07:40<30:45, 230.64s/it]

[KTraining Epoch 3  99.8% | batch:       525 of       526	|	loss: 0.000294342

2022-09-16 10:35:03,168 | INFO : Epoch 3 Training Summary: epoch: 3.000000 | loss: 0.401068 | 
2022-09-16 10:35:03,169 | INFO : Epoch runtime: 0.0 hours, 3.0 minutes, 46.140748262405396 seconds

2022-09-16 10:35:03,170 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 43.9299604098002 seconds
2022-09-16 10:35:03,171 | INFO : Avg batch train. time: 0.4257223581935365 seconds
2022-09-16 10:35:03,171 | INFO : Avg sample train. time: 0.013324405593823646 seconds
2022-09-16 10:35:03,172 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 3  98.3% | batch:        58 of        59	|	loss: 0.000124011

2022-09-16 10:35:10,780 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.6065993309021 seconds

2022-09-16 10:35:10,781 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.597197839191982 seconds
2022-09-16 10:35:10,783 | INFO : Avg batch val. time: 0.12876606507105054 seconds
2022-09-16 10:35:10,784 | INFO : Avg sample val. time: 0.0040670223978543805 seconds
2022-09-16 10:35:10,787 | INFO : Epoch 3 Validation Summary: epoch: 3.000000 | loss: 0.936572 | 
Training Epoch:  30%|███       | 3/10 [11:34<27:04, 232.07s/it]


[KTraining Epoch 4  99.8% | batch:       525 of       526	|	loss: 0.001736742

2022-09-16 10:38:58,830 | INFO : Epoch 4 Training Summary: epoch: 4.000000 | loss: 0.339200 | 
2022-09-16 10:38:58,831 | INFO : Epoch runtime: 0.0 hours, 3.0 minutes, 48.03800296783447 seconds

2022-09-16 10:38:58,832 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 44.95697104930878 seconds
2022-09-16 10:38:58,833 | INFO : Avg batch train. time: 0.42767484990362886 seconds
2022-09-16 10:38:58,835 | INFO : Avg sample train. time: 0.01338551535459412 seconds
2022-09-16 10:38:58,836 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 4  98.3% | batch:        58 of        59	|	loss: 0.000506249

2022-09-16 10:39:06,382 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.543957233428955 seconds

2022-09-16 10:39:06,383 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.590542763471603 seconds
2022-09-16 10:39:06,385 | INFO : Avg batch val. time: 0.1286532671774848 seconds
2022-09-16 10:39:06,386 | INFO : Avg sample val. time: 0.0040634597234858695 seconds
2022-09-16 10:39:06,393 | INFO : Epoch 4 Validation Summary: epoch: 4.000000 | loss: 0.424020 | 
Training Epoch:  40%|████      | 4/10 [15:30<23:21, 233.50s/it]


[KTraining Epoch 5  99.8% | batch:       525 of       526	|	loss: 0.004644439

2022-09-16 10:42:54,888 | INFO : Epoch 5 Training Summary: epoch: 5.000000 | loss: 0.290691 | 
2022-09-16 10:42:54,890 | INFO : Epoch runtime: 0.0 hours, 3.0 minutes, 48.390196323394775 seconds

2022-09-16 10:42:54,892 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 45.64361610412598 seconds
2022-09-16 10:42:54,894 | INFO : Avg batch train. time: 0.4289802587530912 seconds
2022-09-16 10:42:54,896 | INFO : Avg sample train. time: 0.013426372492212662 seconds
2022-09-16 10:42:54,897 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 5  98.3% | batch:        58 of        59	|	loss: 0.005510826

2022-09-16 10:43:03,263 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 8.365394592285156 seconds

2022-09-16 10:43:03,265 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.676637411117554 seconds
2022-09-16 10:43:03,267 | INFO : Avg batch val. time: 0.13011249849351786 seconds
2022-09-16 10:43:03,270 | INFO : Avg sample val. time: 0.004109548935287769 seconds
2022-09-16 10:43:03,274 | INFO : Epoch 5 Validation Summary: epoch: 5.000000 | loss: 0.725827 | 
Training Epoch:  50%|█████     | 5/10 [19:27<19:33, 234.69s/it]


[KTraining Epoch 6  99.8% | batch:       525 of       526	|	loss: 0.000169816

2022-09-16 10:47:11,752 | INFO : Epoch 6 Training Summary: epoch: 6.000000 | loss: 0.186672 | 
2022-09-16 10:47:11,753 | INFO : Epoch runtime: 0.0 hours, 4.0 minutes, 8.472110986709595 seconds

2022-09-16 10:47:11,753 | INFO : Avg epoch train. time: 0.0 hours, 3.0 minutes, 49.44836525122324 seconds
2022-09-16 10:47:11,755 | INFO : Avg batch train. time: 0.43621362215061454 seconds
2022-09-16 10:47:11,756 | INFO : Avg sample train. time: 0.01365276480133424 seconds
2022-09-16 10:47:11,758 | INFO : Evaluating on validation set ...


[KEvaluating Epoch 6  98.3% | batch:        58 of        59	|	loss: 4.22998e-05

2022-09-16 10:47:19,598 | INFO : Validation runtime: 0.0 hours, 0.0 minutes, 7.838864803314209 seconds

2022-09-16 10:47:19,599 | INFO : Avg val. time: 0.0 hours, 0.0 minutes, 7.69286015033722 seconds
2022-09-16 10:47:19,600 | INFO : Avg batch val. time: 0.1303874601752071 seconds
2022-09-16 10:47:19,602 | INFO : Avg sample val. time: 0.004118233485191231 seconds





2022-09-16 10:47:19,605 | INFO : Epoch 6 Validation Summary: epoch: 6.000000 | loss: 0.326069 | 
Training Epoch:  60%|██████    | 6/10 [23:43<16:08, 242.08s/it]

[KTraining Epoch 7   0.0% | batch:         0 of       526	|	loss: 7.45211e-05

                                                               

KeyboardInterrupt: 

In [None]:
utils.save_model(os.path.join(config['save_dir'], 'prep_model_{}.pth'.format(mark)), epoch, model, optimizer)

In [None]:
model.train()
for i in val_loader:
    x, y, m, ids = i
    x = x.to(device)
    m = m.to(device)
    pred = model(x, m)
    break

In [None]:
pred.shape

torch.Size([32, 18])

In [None]:
probs = torch.softmax(pred, dim=-1)
pred_label = torch.argmax(probs, dim=-1)
pred_label.shape

torch.Size([32])

In [None]:
pred_label

tensor([ 3,  2, 14,  4,  4,  1,  4,  3, 15, 17,  4, 11, 17,  1,  1, 15,  1, 10,
         1,  1,  3,  6,  1, 15,  6,  6, 15,  6,  1, 15,  1,  1],
       device='cuda:0')

In [None]:
y

tensor([13,  2, 14,  4,  4,  1,  4,  3, 15, 17,  4, 11, 17,  1,  5, 11,  5,  4,
         0,  2,  3,  6,  5, 12,  4,  9, 15, 10,  1, 15,  2,  0])

In [None]:
model.transformer_encoder.layers[0].linear1.weight.grad

tensor([[ 1.9982e-03,  1.8686e-03, -7.6950e-03,  ...,  3.2540e-05,
          2.5046e-03, -1.7847e-03],
        [ 1.0441e-02, -2.4760e-02, -1.4667e-02,  ..., -5.1662e-03,
          6.1747e-04, -1.9824e-02],
        [-4.3647e-03,  1.7393e-02,  1.9783e-03,  ...,  3.0929e-03,
          2.6371e-03, -6.4250e-03],
        ...,
        [ 1.8496e-02, -1.9215e-02, -2.6746e-02,  ...,  2.0559e-02,
         -1.6608e-02,  1.2978e-02],
        [ 1.7822e-02,  5.6438e-02,  3.2866e-02,  ...,  1.7167e-02,
         -6.8521e-03,  2.6117e-02],
        [-1.5895e-02, -5.7300e-03, -1.0521e-02,  ..., -1.7554e-02,
          2.3200e-02,  2.8165e-03]], device='cuda:0')

In [None]:
model

TSTransformerEncoderClassiregressor(
  (project_inp): Linear(in_features=6, out_features=128, bias=True)
  (pos_enc): FixedPositionalEncoding(
    (dropout): Dropout(p=0.1, inplace=False)
  )
  (transformer_encoder): TransformerEncoder(
    (layers): ModuleList(
      (0): TransformerBatchNormEncoderLayer(
        (self_attn): MultiheadAttention(
          (out_proj): NonDynamicallyQuantizableLinear(in_features=128, out_features=128, bias=True)
        )
        (linear1): Linear(in_features=128, out_features=256, bias=True)
        (dropout): Dropout(p=0.1, inplace=False)
        (linear2): Linear(in_features=256, out_features=128, bias=True)
        (norm1): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (norm2): BatchNorm1d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
        (dropout1): Dropout(p=0.1, inplace=False)
        (dropout2): Dropout(p=0.1, inplace=False)
      )
      (1): TransformerBatchNormEncoderLayer(

In [None]:
# test 1D-Conv layer
from torch import nn


In [None]:
conv_layer = nn.Conv1d(300, 1, 1)
sample_inp = torch.rand((32, 300, 128))
sample_out = conv_layer(sample_inp)
sample_out.shape

torch.Size([32, 1, 128])