# Model Benchmark Notebook

This notebook shows model benchmarks with various architectures for both the 'global' and 'raw' view datasets.

### 1. Installs and Imports

In [None]:
!pip install tsai

In [2]:
from tsai.all import *
import pandas as pd
import matplotlib.pyplot as plt
from scipy import signal
from IPython.display import clear_output

## SECTION 1: GLOBAL VIEW DATA

### 1.1 Processing the data

In [3]:
global_view = pd.read_csv('Full Global View Data.csv')
global_view = global_view.drop(columns=["Unnamed: 0"])
global_view = global_view.dropna()
global_view

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,991,992,993,994,995,996,997,998,999,target
0,0.805089,1.014019,0.879615,0.740667,1.078750,1.079406,1.205584,1.016628,0.998133,0.807200,...,1.057726,1.022003,1.160232,0.824003,0.982633,0.935857,0.936493,1.172365,1.051194,1
1,1.024147,1.022181,1.019334,1.014465,1.005740,0.994486,0.992549,1.009373,1.027504,1.036873,...,1.023391,1.028558,1.033152,1.037896,1.028911,1.015798,1.004923,1.009685,1.017351,0
2,1.177299,1.189832,1.327257,1.206747,1.099294,1.008407,0.956516,0.962503,0.983430,0.962339,...,1.106324,1.242909,1.114947,0.954955,0.958157,0.953832,0.913456,0.877882,1.117244,1
3,1.016976,0.996287,0.942826,0.993312,0.988428,1.002230,0.846147,1.037384,1.117179,1.217264,...,1.049267,0.858152,1.005960,0.875383,1.010819,0.973698,1.185194,1.011624,1.109986,1
4,0.899222,1.008320,0.955359,0.988199,0.794748,0.659896,0.742937,0.921260,0.843183,0.795844,...,1.130554,1.112823,1.151438,0.985616,1.108118,1.091524,1.215078,1.051907,0.997601,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
450,0.994224,0.997571,0.993460,0.990148,0.992702,0.995424,0.994811,0.991220,0.992966,1.001081,...,0.994400,0.989497,0.987133,0.994495,0.999678,1.001759,1.001121,0.993380,0.988747,1
451,0.961073,0.984563,0.925543,0.941521,0.949315,0.990593,1.069938,1.023916,1.008956,0.957805,...,1.002177,1.025846,1.001176,0.949222,0.952527,0.962821,0.932300,0.924307,0.998634,1
452,1.062949,1.150878,1.106013,1.103070,1.073420,1.063598,1.037858,1.025627,1.001151,0.992435,...,0.954795,0.957703,0.954115,0.960708,0.954982,0.964284,0.954554,0.970666,0.946162,1
453,0.970285,0.915315,1.031886,1.051039,0.970926,1.094397,0.993410,1.037287,1.034228,1.005614,...,1.007761,0.923017,0.999288,0.967330,0.962120,1.032110,0.987394,0.925149,0.996282,0


In [4]:
df = global_view

### 1.2 Benchmarking the model architectures

In [None]:
X, y = df2xy(df, target_col='target')
splits = get_splits(y, valid_size=.2, stratify=True, random_state=23, shuffle=True)
splits



bs = 64
print(X.shape)
tfms  = [None, [Categorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
dls   = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])

archs = [(FCN, {}), (ResNet, {}), (xresnet1d34, {}), (ResCNN, {}), 
         (LSTM, {'n_layers':1, 'bidirectional': False}), (LSTM, {'n_layers':2, 'bidirectional': False}), (LSTM, {'n_layers':3, 'bidirectional': False}), 
         (LSTM, {'n_layers':1, 'bidirectional': True}), (LSTM, {'n_layers':2, 'bidirectional': True}), (LSTM, {'n_layers':3, 'bidirectional': True}),
         (LSTM_FCN, {}), (LSTM_FCN, {'shuffle': False}), (InceptionTime, {}), (XceptionTime, {}), (OmniScaleCNN, {}), (mWDN, {'levels': 4})]

results = pd.DataFrame(columns=['arch', 'hyperparams',  'train loss', 'valid loss', 'accuracy', 'time'])
for i, (arch, k) in enumerate(archs):
    model = create_model(arch, dls=dls, **k)
    print(model.__class__.__name__)
    learn = Learner(dls, model,  metrics=accuracy)
    start = time.time()
    learn.fit_one_cycle(100, 1e-3)
    elapsed = time.time() - start
    vals = learn.recorder.values[-1]
    results.loc[i] = [arch.__name__, k, vals[0], vals[1], vals[2], int(elapsed)]
    results.sort_values(by='accuracy', ascending=False, ignore_index=True, inplace=True)
    clear_output
    display(results)

## SECTION 2: RAW VIEW DATA

### 2.1 Processing the data

In [6]:
raw_view = pd.read_csv('Full Raw View Data.csv')
raw_view = raw_view.drop(columns=["Unnamed: 0"])
raw_view = raw_view.dropna()
raw_view

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,2914,2915,2916,2917,2918,2919,2920,2921,2922,target
0,0.999498,0.999498,0.999498,1.000843,0.999239,0.998717,0.999009,1.000242,1.000556,1.004655,...,1.002233,1.001194,1.000569,1.000624,1.000572,1.000410,1.000286,0.999472,0.999181,1
1,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,...,0.999043,0.999329,0.999545,0.999879,0.999941,1.000078,1.000191,1.000228,1.000790,0
2,0.999988,0.999988,0.999988,0.998625,0.998938,0.999561,0.999891,1.000230,1.000960,1.001721,...,0.999728,1.000094,1.000466,1.000359,1.000101,1.000118,1.000004,0.999557,0.999436,0
3,0.985750,0.999616,0.999616,0.974079,0.975111,0.977444,0.980781,0.984821,0.988925,0.992545,...,0.999368,0.999984,1.000376,1.000485,1.000632,1.000815,1.001115,1.001891,1.002667,1
4,0.999999,0.999999,0.999999,1.000008,1.000098,1.000089,0.999967,0.999921,0.999992,0.999881,...,1.000167,1.000161,0.999991,0.999776,0.999548,0.999394,0.999586,0.999746,0.999799,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
460,0.999978,0.999978,0.999978,1.001599,0.999195,0.998883,1.000443,1.001680,1.000570,0.999235,...,1.000452,0.999919,1.000165,0.999701,0.999691,0.999538,0.999220,1.000342,1.002340,0
461,0.999865,0.999865,0.999865,0.999865,0.999865,0.999865,0.999865,0.999865,0.999865,0.999865,...,0.997865,0.999971,1.001114,1.001922,1.002028,1.001742,1.001066,1.000098,0.999621,1
462,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,1.000011,...,1.000213,0.999940,0.999969,0.999876,0.999704,0.999671,0.999576,0.999719,0.999390,0
463,0.999969,0.999997,0.999997,0.999622,0.999755,0.999820,0.999919,0.999984,1.000049,1.000053,...,1.000460,1.000319,1.000161,1.000065,0.999974,0.999956,0.999874,0.999806,0.999668,0


In [7]:
df = raw_view

### 2.3 Benchmarking the model architectures

In [None]:
X, y = df2xy(df, target_col='target')
splits = get_splits(y, valid_size=.2, stratify=True, random_state=23, shuffle=True)
splits



bs = 64
print(X.shape)
tfms  = [None, [Categorize()]]
dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
dls   = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=[bs, bs*2])

archs = [(FCN, {}), (ResNet, {}), (xresnet1d34, {}), (ResCNN, {}), 
         (LSTM, {'n_layers':1, 'bidirectional': False}), (LSTM, {'n_layers':2, 'bidirectional': False}), (LSTM, {'n_layers':3, 'bidirectional': False}), 
         (LSTM, {'n_layers':1, 'bidirectional': True}), (LSTM, {'n_layers':2, 'bidirectional': True}), (LSTM, {'n_layers':3, 'bidirectional': True}),
         (LSTM_FCN, {}), (LSTM_FCN, {'shuffle': False}), (InceptionTime, {}), (XceptionTime, {}), (OmniScaleCNN, {}), (mWDN, {'levels': 4})]

results = pd.DataFrame(columns=['arch', 'hyperparams',  'train loss', 'valid loss', 'accuracy', 'time'])
for i, (arch, k) in enumerate(archs):
    model = create_model(arch, dls=dls, **k)
    print(model.__class__.__name__)
    learn = Learner(dls, model,  metrics=accuracy)
    start = time.time()
    learn.fit_one_cycle(100, 1e-3)
    elapsed = time.time() - start
    vals = learn.recorder.values[-1]
    results.loc[i] = [arch.__name__, k, vals[0], vals[1], vals[2], int(elapsed)]
    results.sort_values(by='accuracy', ascending=False, ignore_index=True, inplace=True)
    clear_output
    display(results)