In [1]:
import os
os.chdir("../")

In [2]:
import numpy as np
import torch

In [3]:
from storage.har_datasets import HARTHDataset, UCI_HARDataset, sts_medoids
from s3ts.api.dms.har_datasets import LDFDataset, DFDataset
from storage.label_mappings import *
from s3ts.api.nets.methods import create_model_from_DM, train_model, test_model

In [4]:
from torchvision.transforms import Normalize

In [5]:
HARTH_LABELS

{1: 'walking',
 2: 'running',
 3: 'shuffling',
 4: 'stairs_up',
 5: 'stairs_down',
 6: 'standing',
 7: 'sitting',
 8: 'lying',
 13: 'cycking_sit',
 14: 'cycling_stand',
 130: 'cycling_sit_idle',
 140: 'cycling_stand_idle'}

In [6]:
label_mapping = np.zeros(141)
label_mapping[1:9] = np.arange(8)
label_mapping[13] = 8
label_mapping[14] = 9
label_mapping[130] = 10
label_mapping[140] = 11

ds = HARTHDataset("./datasets/HARTH/", wsize=48, normalize=True, label_mapping=label_mapping)

In [7]:
len(ds)

6225494

In [8]:
if not os.path.exists("./datasets/HARTH/meds.npz"):
    meds = sts_medoids(ds, n=500)
    with open("./datasets/HARTH/meds.npz", "wb") as f:
        np.save(f, meds)
else:
    meds = np.load("./datasets/HARTH/meds.npz")

In [9]:
dfds = DFDataset(ds, patterns=meds, w=0.1, dm_transform=None, ram=True)

Loading cached dissimilarity frames if available...


In [10]:
DM = []

np.random.seed(42)
for i in np.random.choice(np.arange(len(dfds)), 500):
    dm, _, _ = dfds[i]
    DM.append(dm)

DM = torch.stack(DM)

dm_transform = Normalize(mean=DM.mean(dim=[0, 2, 3]), std=DM.std(dim=[0, 2, 3]))

In [11]:
dfds.dm_transform = dm_transform

In [12]:
data_split = {
    "train": lambda x: x<6000000,
    "val": lambda x: (x>=6000000) * (x<6050000),
    "test": lambda x: x>=6050000
}

dm = LDFDataset(dfds, data_split=data_split, batch_size=32, random_seed=42, num_workers=16)

In [13]:
len(dm.ds_train) + len(dm.ds_val) + len(dm.ds_test)

6225494

In [14]:
model = create_model_from_DM(dm, name=None, 
        dsrc="img", arch="cnn", task="cls")

Input shape:  torch.Size([1, 12, 48, 48])
Latent shape:  torch.Size([1, 40, 3, 48])


In [15]:
model, data = train_model(dm, model, max_epochs=2)
print(data)

Global seed set to 42
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
You are using a CUDA device ('NVIDIA GeForce RTX 3060') that has Tensor Cores. To properly utilize them, you should set `torch.set_float32_matmul_precision('medium' | 'high')` which will trade-off precision for performance. For more details, read https://pytorch.org/docs/stable/generated/torch.set_float32_matmul_precision.html#torch.set_float32_matmul_precision
Missing logger folder: training/img_cnn_cls_wl48_ws1_ss0_np12_lp48_n12
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

   | Name       | Type               | Params
---------------------------------------------------
0  | encoder    | CNN_IMG            | 24.7 K
1  | decoder    | LinearDecoder      | 377 K 
2  | flatten    | Flatten            | 0     
3  | softmax    | Softmax            | 0     
4  | train_acc  | MulticlassAccuracy | 0     
5  | train_f1   | Mu

Epoch 1: 100%|██████████| 186561/186561 [30:17<00:00, 102.67it/s, v_num=0, train_loss_step=2.120, val_loss=1.680, val_acc=0.942, val_auroc=0.00102, train_loss_epoch=1.770, train_acc=0.852]

`Trainer.fit` stopped: `max_epochs=2` reached.


Epoch 1: 100%|██████████| 186561/186561 [30:17<00:00, 102.67it/s, v_num=0, train_loss_step=2.120, val_loss=1.680, val_acc=0.942, val_auroc=0.00102, train_loss_epoch=1.770, train_acc=0.852]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]


Input shape:  torch.Size([1, 12, 48, 48])
Latent shape:  torch.Size([1, 40, 3, 48])
Validation DataLoader 0: 100%|██████████| 1555/1555 [00:24<00:00, 63.44it/s]


{'val_acc': 0.9420619010925293, 'val_f1': 0.9420619010925293, 'val_auroc': 0.0010182209080085158}
