In [1]:
import pandas as pd
import numpy as np

In [2]:
UCR_DATASETS = [
    'Adiac',
    'ArrowHead',
    'Beef',
    'BeetleFly',
    'BirdChicken',
    'Car',
    'CBF',
    'ChlorineConcentration',
    'CinCECGTorso',
    'Coffee',
    'Computers',
    'CricketX',
    'CricketY',
    'CricketZ',
    'DiatomSizeReduction',
    'DistalPhalanxOutlineAgeGroup',
    'DistalPhalanxOutlineCorrect',
    'DistalPhalanxTW',
    'Earthquakes',
    'ECG200',
    'ECG5000',
    'ECGFiveDays',
    'ElectricDevices',
    'FaceAll',
    'FaceFour',
    'FacesUCR',
    'FiftyWords',
    'Fish',
    'FordA',
    'FordB',
    'GunPoint',
    'Ham',
    'HandOutlines',
    'Haptics',
    'Herring',
    'InlineSkate',
    'InsectWingbeatSound',
    'ItalyPowerDemand',
    'LargeKitchenAppliances',
    'Lightning2',
    'Lightning7',
    'Mallat',
    'Meat',
    'MedicalImages',
    'MiddlePhalanxOutlineAgeGroup',
    'MiddlePhalanxOutlineCorrect',
    'MiddlePhalanxTW',
    'MoteStrain',
    'NonInvasiveFetalECGThorax1',
    'NonInvasiveFetalECGThorax2',
    'OliveOil',
    'OSULeaf',
    'PhalangesOutlinesCorrect',
    'Phoneme',
    'Plane',
    'ProximalPhalanxOutlineAgeGroup',
    'ProximalPhalanxOutlineCorrect',
    'ProximalPhalanxTW',
    'RefrigerationDevices',
    'ScreenType',
    'ShapeletSim',
    'ShapesAll',
    'SmallKitchenAppliances',
    'SonyAIBORobotSurface1',
    'SonyAIBORobotSurface2',
    'StarLightCurves',
    'Strawberry',
    'SwedishLeaf',
    'Symbols',
    'SyntheticControl',
    'ToeSegmentation1',
    'ToeSegmentation2',
    'Trace',
    'TwoLeadECG',
    'TwoPatterns',
    'UWaveGestureLibraryAll',
    'UWaveGestureLibraryX',
    'UWaveGestureLibraryY',
    'UWaveGestureLibraryZ',
    'Wafer',
    'Wine',
    'WordSynonyms',
    'Worms',
    'WormsTwoClass',
    'Yoga',
    'ACSF1',
    'BME',
    'Chinatown',
    'Crop',
    'EOGHorizontalSignal',
    'EOGVerticalSignal',
    'EthanolLevel',
    'FreezerRegularTrain',
    'FreezerSmallTrain',
    'Fungi',
    'GunPointAgeSpan',
    'GunPointMaleVersusFemale',
    'GunPointOldVersusYoung',
    'HouseTwenty',
    'InsectEPGRegularTrain',
    'InsectEPGSmallTrain',
    'MixedShapesRegularTrain',
    'MixedShapesSmallTrain',
    'PigAirwayPressure',
    'PigArtPressure',
    'PigCVP',
    'PowerCons',
    'Rock',
    'SemgHandGenderCh2',
    'SemgHandMovementCh2',
    'SemgHandSubjectCh2',
    'SmoothSubspace',
    'UMD'
]

In [4]:
data = {
    'dataset': [],
    'label': [],
    'ratio': [],
}

for dataset in UCR_DATASETS:
    print(f'{dataset}')
    # Load the data from .tsv files
    train_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TRAIN.tsv')
    x_train, y_train = train_data[:, 1:], train_data[:, 0]
    
    test_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TEST.tsv')
    x_test, y_test = test_data[:, 1:], test_data[:, 0]

    # Filter samples from positive label
    x_train_ = x_train
    y_train_ = y_train

    # Apply z normalization
    std_ = x_train_.std(axis=1, keepdims=True)
    std_[std_ == 0] = 1.0
    x_train_ = (x_train_ - x_train_.mean(axis=1, keepdims=True)) / std_
    
    std_ = x_test.std(axis=1, keepdims=True)
    std_[std_ == 0] = 1.0
    x_test_ = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
    
    labels, counts = np.unique(y_train, return_counts=True)
    
    for idx, label in enumerate(labels):
        data['dataset'].append(dataset)
        data['label'].append(label)
        data['ratio'].append(counts[idx] / len(y_train))

df = pd.DataFrame(data)
df.head()

Adiac
ArrowHead
Beef
BeetleFly
BirdChicken
Car
CBF
ChlorineConcentration
CinCECGTorso
Coffee
Computers
CricketX
CricketY
CricketZ
DiatomSizeReduction
DistalPhalanxOutlineAgeGroup
DistalPhalanxOutlineCorrect
DistalPhalanxTW
Earthquakes
ECG200
ECG5000
ECGFiveDays
ElectricDevices
FaceAll
FaceFour
FacesUCR
FiftyWords
Fish
FordA
FordB
GunPoint
Ham
HandOutlines
Haptics
Herring
InlineSkate
InsectWingbeatSound
ItalyPowerDemand
LargeKitchenAppliances
Lightning2
Lightning7
Mallat
Meat
MedicalImages
MiddlePhalanxOutlineAgeGroup
MiddlePhalanxOutlineCorrect
MiddlePhalanxTW
MoteStrain
NonInvasiveFetalECGThorax1
NonInvasiveFetalECGThorax2
OliveOil
OSULeaf
PhalangesOutlinesCorrect
Phoneme
Plane
ProximalPhalanxOutlineAgeGroup
ProximalPhalanxOutlineCorrect
ProximalPhalanxTW
RefrigerationDevices
ScreenType
ShapeletSim
ShapesAll
SmallKitchenAppliances
SonyAIBORobotSurface1
SonyAIBORobotSurface2
StarLightCurves
Strawberry
SwedishLeaf
Symbols
SyntheticControl
ToeSegmentation1
ToeSegmentation2
Trace
TwoLeadE

Unnamed: 0,dataset,label,ratio
0,Adiac,1.0,0.033333
1,Adiac,2.0,0.025641
2,Adiac,3.0,0.012821
3,Adiac,4.0,0.035897
4,Adiac,5.0,0.010256


In [11]:
df.sort_values(by='ratio', ascending=False)

Unnamed: 0,dataset,label,ratio
615,Wafer,1.0,0.903000
118,Earthquakes,0.0,0.819876
533,SonyAIBORobotSurface1,2.0,0.700000
121,ECG200,1.0,0.690000
454,ProximalPhalanxOutlineCorrect,1.0,0.676667
...,...,...,...
216,FiftyWords,49.0,0.004444
209,FiftyWords,42.0,0.004444
192,FiftyWords,25.0,0.004444
126,ECG5000,5.0,0.004000


In [18]:
all_results = pd.read_csv('./example.csv')
all_results[all_results.dataset_name == 'Wafer-1.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
4399,isolation-forest,Wafer-1.0,0.787832,0.687703
3453,ocsvm,Wafer-1.0,0.619556,0.508274
615,chrono_gam,Wafer-1.0,0.341916,0.746431
1561,dagmm,Wafer-1.0,0.272295,0.762167
2507,deepsvdd,Wafer-1.0,0.135904,0.125081


In [19]:
all_results[all_results.dataset_name == 'Earthquakes-0.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
3902,isolation-forest,Earthquakes-0.0,0.855967,0.748201
2010,deepsvdd,Earthquakes-0.0,0.445362,0.438849
1064,dagmm,Earthquakes-0.0,0.308667,0.719424
118,chrono_gam,Earthquakes-0.0,0.0,0.683453
2956,ocsvm,Earthquakes-0.0,0.0,0.251799


In [22]:
all_results[all_results.dataset_name == 'SonyAIBORobotSurface1-2.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
533,chrono_gam,SonyAIBORobotSurface1-2.0,0.712119,0.562396
4317,isolation-forest,SonyAIBORobotSurface1-2.0,0.664975,0.780366
3371,ocsvm,SonyAIBORobotSurface1-2.0,0.534091,0.727121
1479,dagmm,SonyAIBORobotSurface1-2.0,0.460074,0.494176
2425,deepsvdd,SonyAIBORobotSurface1-2.0,0.087554,0.216306


In [32]:
df.sort_values(by='ratio', ascending=True).dataset.values

array(['FiftyWords', 'ECG5000', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'FiftyWords', 'Phoneme', 'Phoneme', 'Phoneme', 'Phoneme',
       'Phoneme', 'Phoneme', 'Phoneme', 'Phoneme', 'FiftyWords',
       'FiftyWords', 'WordSynonyms', 'FiftyWords', 'FiftyWords',
       'FiftyWords', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'FiftyWords', 'FiftyWords', 'FiftyWords', 'FiftyWords', 'Phoneme',
       'Phoneme', 'Phoneme', 'Phoneme', 'Phoneme', 'Phoneme', 'Adiac',
       'FiftyWords', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'WordSynonyms', 'Adiac', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'FiftyWords', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'FiftyWords', 'Phoneme', 'Phoneme', 'Phoneme', 'Phoneme',
       'Phoneme', 'Phoneme', 'WordSynonyms', 'WordSynonyms',
       'WordSynonyms', 'WordSynonyms', 'Adiac', 'FiftyWords',
       'FiftyWords', 'FiftyWords', 'FiftyWords', 'FiftyWords',
       'MedicalImages', 'ShapesAll', 'ShapesAll', 'ShapesAll',
       'S

In [24]:
all_results[all_results.dataset_name == 'FiftyWords-41.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
3992,isolation-forest,FiftyWords-41.0,0.02603,0.013187
2100,deepsvdd,FiftyWords-41.0,0.024822,0.013187
208,chrono_gam,FiftyWords-41.0,0.0,0.013187
1154,dagmm,FiftyWords-41.0,0.0,0.013187
3046,ocsvm,FiftyWords-41.0,0.0,0.986813


In [25]:
all_results[all_results.dataset_name == 'ECG5000-5.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
126,chrono_gam,ECG5000-5.0,0.997422,0.995111
3910,isolation-forest,ECG5000-5.0,0.00973,0.004889
2018,deepsvdd,ECG5000-5.0,0.007834,0.154
1072,dagmm,ECG5000-5.0,0.0,0.004889
2964,ocsvm,ECG5000-5.0,0.0,0.995111


In [26]:
all_results[all_results.dataset_name == 'FiftyWords-50.0'].sort_values(by='f1', ascending=False)

Unnamed: 0,classifier_name,dataset_name,f1,accuracy
4001,isolation-forest,FiftyWords-50.0,0.017429,0.008791
2109,deepsvdd,FiftyWords-50.0,0.01267,0.167033
217,chrono_gam,FiftyWords-50.0,0.0,0.008791
1163,dagmm,FiftyWords-50.0,0.0,0.008791
3055,ocsvm,FiftyWords-50.0,0.0,0.991209


In [30]:
all_results[all_results.dataset_name.str.contains('FiftyWords')].groupby('classifier_name').mean()

  all_results[all_results.dataset_name.str.contains('FiftyWords')].groupby('classifier_name').mean()


Unnamed: 0_level_0,f1,accuracy
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1
chrono_gam,0.645105,0.58967
dagmm,0.416163,0.361407
deepsvdd,0.015954,0.416923
isolation-forest,0.212292,0.572176
ocsvm,0.216962,0.983692


In [33]:
all_results[all_results.dataset_name.str.contains('ECG200')].groupby('classifier_name').mean()

  all_results[all_results.dataset_name.str.contains('ECG200')].groupby('classifier_name').mean()


Unnamed: 0_level_0,f1,accuracy
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1
chrono_gam,0.298403,0.405
dagmm,0.033524,0.46
deepsvdd,0.223853,0.515
isolation-forest,0.646136,0.675
ocsvm,0.483835,0.635


In [34]:
all_results[all_results.dataset_name.str.contains('Phoneme')].groupby('classifier_name').mean()

  all_results[all_results.dataset_name.str.contains('Phoneme')].groupby('classifier_name').mean()


Unnamed: 0_level_0,f1,accuracy
classifier_name,Unnamed: 1_level_1,Unnamed: 2_level_1
chrono_gam,0.232988,0.226022
dagmm,0.082019,0.081562
deepsvdd,0.010018,0.443565
isolation-forest,0.049223,0.091488
ocsvm,0.0,0.974359
