In [1]:
import time 
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tsai.all import TSDatasets, TSDataLoaders, TSStandardize, Categorize
from tsai.all import FCN, LSTM, InceptionTime, XceptionTime, XCM, LSTM_FCN
import sklearn.metrics as skm
from IPython.display import clear_output

In [2]:
#optinal download if using any of the functions
from fastai.callback.training import *
from fastai.callback.tracker import *
from fastai.test_utils import *
from fastai.callback.schedule import *

In [3]:
from fastai.learner import *
from fastai.metrics import accuracy

In [2]:
from geoxts.benchmark_models import *
#from geoxts.benchmark_data import get_UCR_data 
from tsai.all import get_UCR_data #use when the dataset in not part of the folder (need to download)

In [5]:
#from tsai.all import get_UCR_data

In [6]:
def count_parameters(model, trainable=True):
    if trainable: return sum(p.numel() for p in model.parameters() if p.requires_grad)
    else: return sum(p.numel() for p in model.parameters())

In [7]:
def dataset_result(dsid, bs, archs):
    
    X, y, splits = get_UCR_data(dsid, return_split=False)
    #splits = get_splits(y, valid_size=.3, stratify=True, random_state=23, shuffle=False)
    print(X.shape, y.shape)
    tfms  = [None, [Categorize()]]
    dsets = TSDatasets(X, y, tfms=tfms, splits=splits)
    dls   = TSDataLoaders.from_dsets(dsets.train, dsets.valid, bs=bs, batch_tfms=[TSStandardize(by_var=True)], num_workers=0)
    results = pd.DataFrame(columns=['arch', 'hyperparams', 'total params', 'valid loss', 'accuracy', 'time'])
    for i, (arch, k) in enumerate(archs):
        print(arch)
        print(dsid)
        dlmodel = build_model(arch, dls=dls, **k, verbose=True)
        print(dlmodel.__class__.__name__)
        learn = Learner(dls, dlmodel,  metrics=accuracy)
        start = time.time()
        try: 
            lr_valley = learn.lr_find(show_plot = False)
        except Exception:
            lr_valley =1e-3
        learn.fit_one_cycle(20, lr_valley, cbs=[SaveModelCallback(monitor='accuracy'), EarlyStoppingCallback(monitor='valid_loss', min_delta=0.01, patience=5)])
        elapsed = time.time() - start
        vals = learn.validate()
        results.loc[i] = [arch.__name__, k, count_parameters(dlmodel), vals[0], vals[1], int(elapsed)]
        #results.sort_values(by='accuracy', ascending=False, kind='stable', ignore_index=True, inplace=True)
        clear_output()
        display(results)
    return results

In [4]:
Motion = [ 'ArticularyWordRecognition', 'PenDigits']
AS = ['Heartbeat', 'JapaneseVowels', 'Libras', 'Phoneme', 'SpokenArabicDigits']
ECG = ['AtrialFibrillation', 'StandWalkJump' ]
EEG = [ 'FaceDetection', 'FingerMovements', 'HandMovementDirection',  'SelfRegulationSCP1', 'SelfRegulationSCP2']
HAR = ['BasicMotions', 'Cricket', 'Epilepsy', 'Ering', 'Handwriting', 'NATOPS', 'RacketSports', 'UWaveGestureLibrary']
Other= ['EthanolConcentration', 'LSST', 'PEMS-SF']
Typelist = [Motion, AS, ECG, EEG, HAR, Other]

In [6]:
dataset_fea = pd.DataFrame(columns=['Dataset', 'No. of Samples', 'No. of features', 'Sequence length'])
i = 0
donelist = []
for type in Typelist:
    for dsid in type:
        if dsid not in donelist:
            #print(dsid)
            X, y, splits = get_UCR_data(dsid, return_split=False)
            fea = X.shape
            dataset_fea.loc[i] = [dsid, fea[0], fea[1], fea[2] ]
            i = i+1

display(dataset_fea)

Unnamed: 0,Dataset,No. of Samples,No. of features,Sequence length
0,ArticularyWordRecognition,575,9,144
1,PenDigits,10992,2,8
2,Heartbeat,409,61,405
3,JapaneseVowels,640,12,29
4,Libras,360,2,45
5,Phoneme,2110,1,1024
6,SpokenArabicDigits,8798,13,93
7,AtrialFibrillation,30,2,640
8,StandWalkJump,27,4,2500
9,FaceDetection,9414,144,62


In [7]:
dataset_fea.to_csv("UCR_dataset.csv")

In [9]:

archs = [(XCM_LSTM, {}), (LSTM_XCM, {}), (LSTM_FCN_2dCNN, {}), (XCM, {}), (LSTM_FCN, {}), (LSTM_2dCNN, {}), (FCN, {}), (InceptionTime, {}), (XceptionTime, {}), 
         (LSTM, {'n_layers':3, 'bidirectional': False}),(LSTM, {'n_layers':3, 'bidirectional': True})]

benchmark_accuracy = pd.DataFrame(columns = ['dataset', 'XCM_LSTM', 'LSTM_XCM','LSTM_FCN_2dCNN','XCM','LSTM_FCN','LSTM_2dCNN', 'FCN', 'InceptionTime', 'XceptionTime', "LSTM", 'LSTM_bidirectional'])

benchmark_time = pd.DataFrame(columns = ['dataset', 'XCM_LSTM', 'LSTM_XCM','LSTM_FCN_2dCNN','XCM','LSTM_FCN','LSTM_2dCNN', 'FCN', 'InceptionTime', 'XceptionTime', "LSTM", 'LSTM_bidirectional'])

In [10]:
i = 0
donelist = []
for type in Typelist:
    for dsid in type:
        if dsid not in donelist:
            print(dsid)
            #dsid = type[i]
            bs = 32
            results = dataset_result(dsid, bs, archs)
            benchmark_accuracy.loc[i, 'dataset'] = dsid
            benchmark_accuracy.iloc[i, 1:] =  list(results['accuracy'])
            benchmark_time.loc[i, 'dataset'] = dsid
            benchmark_time.iloc[i, 1:] =  list(results['time'])
            i = i+1
            donelist.append(dsid)
            clear_output()
            
            display(benchmark_accuracy)

Unnamed: 0,dataset,XCM_LSTM,LSTM_XCM,LSTM_FCN_2dCNN,XCM,LSTM_FCN,LSTM_2dCNN,FCN,InceptionTime,XceptionTime,LSTM,LSTM_bidirectional
0,ArticularyWordRecognition,0.916667,0.956667,0.87,0.94,0.983333,0.666667,0.97,0.986667,0.98,0.78,0.893333
1,PenDigits,0.966552,0.957976,0.990566,0.981132,0.989708,0.963122,0.987421,0.989137,0.993425,0.979703,0.971412
2,Heartbeat,0.770732,0.736585,0.785366,0.770732,0.780488,0.741463,0.795122,0.765854,0.770732,0.731707,0.741463
3,JapaneseVowels,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784,0.083784
4,Libras,0.627778,0.788889,0.733333,0.794444,0.772222,0.544444,0.805556,0.877778,0.883333,0.333333,0.238889
5,Phoneme,0.172468,0.117616,0.209388,0.152426,0.202004,0.101266,0.265823,0.184599,0.30538,0.120253,0.124473
6,SpokenArabicDigits,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045,0.100045
7,AtrialFibrillation,0.4,0.4,0.4,0.4,0.333333,0.333333,0.333333,0.333333,0.4,0.333333,0.266667
8,StandWalkJump,0.4,0.6,0.533333,0.4,0.4,0.533333,0.333333,0.4,0.4,0.333333,0.4
9,FaceDetection,0.635925,0.69126,0.549376,0.645289,0.570091,0.569523,0.568388,0.665437,0.558173,0.635641,0.61748


In [12]:
benchmark_accuracy.to_csv("UCR_benchmark_accuracy.csv")  
benchmark_time.to_csv("UCR_benchmark_time.csv")

In [13]:
benchmark_time

Unnamed: 0,dataset,XCM_LSTM,LSTM_XCM,LSTM_FCN_2dCNN,XCM,LSTM_FCN,LSTM_2dCNN,FCN,InceptionTime,XceptionTime,LSTM,LSTM_bidirectional
0,ArticularyWordRecognition,22,15,19,21,11,12,9,40,43,41,83
1,PenDigits,162,61,148,107,46,55,35,307,343,54,151
2,Heartbeat,491,319,164,496,17,172,10,62,45,82,152
3,JapaneseVowels,3,3,4,2,3,2,2,8,11,4,9
4,Libras,6,5,6,5,4,5,4,16,19,12,23
5,Phoneme,137,34,116,56,83,39,81,172,335,706,1117
6,SpokenArabicDigits,62,46,56,52,31,36,24,112,113,98,207
7,AtrialFibrillation,9,5,8,10,5,5,5,21,21,113,238
8,StandWalkJump,165,133,79,154,15,41,15,56,55,377,723
9,FaceDetection,527,443,286,348,82,256,28,147,117,93,199
