In [35]:
import numpy as np
import pandas as pd
from sklearn.ensemble import IsolationForest
from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score

In [36]:
UCR_DATASETS = [
    'Adiac',
    'ArrowHead',
    'Beef',
    'BeetleFly',
    'BirdChicken',
    'Car',
    'CBF',
    'ChlorineConcentration',
    'CinCECGTorso',
    'Coffee',
    'Computers',
    'CricketX',
    'CricketY',
    'CricketZ',
    'DiatomSizeReduction',
    'DistalPhalanxOutlineAgeGroup',
    'DistalPhalanxOutlineCorrect',
    'DistalPhalanxTW',
    'Earthquakes',
    'ECG200',
    'ECG5000',
    'ECGFiveDays',
    'ElectricDevices',
    'FaceAll',
    'FaceFour',
    'FacesUCR',
    'FiftyWords',
    'Fish',
    'FordA',
    'FordB',
    'GunPoint',
    'Ham',
    'HandOutlines',
    'Haptics',
    'Herring',
    'InlineSkate',
    'InsectWingbeatSound',
    'ItalyPowerDemand',
    'LargeKitchenAppliances',
    'Lightning2',
    'Lightning7',
    'Mallat',
    'Meat',
    'MedicalImages',
    'MiddlePhalanxOutlineAgeGroup',
    'MiddlePhalanxOutlineCorrect',
    'MiddlePhalanxTW',
    'MoteStrain',
    'NonInvasiveFetalECGThorax1',
    'NonInvasiveFetalECGThorax2',
    'OliveOil',
    'OSULeaf',
    'PhalangesOutlinesCorrect',
    'Phoneme',
    'Plane',
    'ProximalPhalanxOutlineAgeGroup',
    'ProximalPhalanxOutlineCorrect',
    'ProximalPhalanxTW',
    'RefrigerationDevices',
    'ScreenType',
    'ShapeletSim',
    'ShapesAll',
    'SmallKitchenAppliances',
    'SonyAIBORobotSurface1',
    'SonyAIBORobotSurface2',
    'StarLightCurves',
    'Strawberry',
    'SwedishLeaf',
    'Symbols',
    'SyntheticControl',
    'ToeSegmentation1',
    'ToeSegmentation2',
    'Trace',
    'TwoLeadECG',
    'TwoPatterns',
    'UWaveGestureLibraryAll',
    'UWaveGestureLibraryX',
    'UWaveGestureLibraryY',
    'UWaveGestureLibraryZ',
    'Wafer',
    'Wine',
    'WordSynonyms',
    'Worms',
    'WormsTwoClass',
    'Yoga',
    'ACSF1',
    'BME',
    'Chinatown',
    'Crop',
    'EOGHorizontalSignal',
    'EOGVerticalSignal',
    'EthanolLevel',
    'FreezerRegularTrain',
    'FreezerSmallTrain',
    'Fungi',
    'GunPointAgeSpan',
    'GunPointMaleVersusFemale',
    'GunPointOldVersusYoung',
    'HouseTwenty',
    'InsectEPGRegularTrain',
    'InsectEPGSmallTrain',
    'MixedShapesRegularTrain',
    'MixedShapesSmallTrain',
    'PigAirwayPressure',
    'PigArtPressure',
    'PigCVP',
    'PowerCons',
    'Rock',
    'SemgHandGenderCh2',
    'SemgHandMovementCh2',
    'SemgHandSubjectCh2',
    'SmoothSubspace',
    'UMD'
]

In [37]:
results = {
    'dataset': [],
    'model': [],
    'label': [],
    'accuracy': [],
    'f1': [],
    'recall': [],
    'precision': [],
}

In [38]:
for dataset in UCR_DATASETS:
    print(f'Starting experiments with {dataset} dataset...')
    # Load the data from .tsv files
    train_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TRAIN.tsv')
    x_train, y_train = train_data[:, 1:], train_data[:, 0]
    
    test_data = np.genfromtxt(f'../data/ucr/{dataset}/{dataset}_TEST.tsv')
    x_test, y_test = test_data[:, 1:], test_data[:, 0]
    
    unique_labels = np.unique(y_train)
    for label in unique_labels:
        print(f'\tClassifying the label {label}...')
        # Filter samples from positive label
        x_train_ = x_train[y_train == label]
        y_train_ = y_train[y_train == label]

        y_test_ = np.array([1 if y_true == label else -1 for y_true in y_test])
        
        # Apply z normalization
        std_ = x_train_.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_train_ = (x_train_ - x_train_.mean(axis=1, keepdims=True)) / std_
        
        std_ = x_test.std(axis=1, keepdims=True)
        std_[std_ == 0] = 1.0
        x_test = (x_test - x_test.mean(axis=1, keepdims=True)) / std_
    
        # Fit an Isolation Forest algorithm to the dataset
        isolation_forest = IsolationForest(random_state=np.random.RandomState(42), contamination='auto')
        isolation_forest.fit(x_train_)
        
        preds = isolation_forest.predict(x_test)
        
        results['dataset'].append(dataset)
        results['model'].append('isolation-forest')
        results['label'].append(label)
        results['accuracy'].append(accuracy_score(preds, y_test_))
        results['f1'].append(f1_score(preds, y_test_))
        results['recall'].append(recall_score(preds, y_test_))
        results['precision'].append(precision_score(preds, y_test_))



Starting experiments with Adiac dataset...
	Classifying the label 1.0...
	Classifying the label 2.0...
	Classifying the label 3.0...
	Classifying the label 4.0...
	Classifying the label 5.0...
	Classifying the label 6.0...
	Classifying the label 7.0...
	Classifying the label 8.0...
	Classifying the label 9.0...
	Classifying the label 10.0...
	Classifying the label 11.0...
	Classifying the label 12.0...
	Classifying the label 13.0...
	Classifying the label 14.0...
	Classifying the label 15.0...
	Classifying the label 16.0...
	Classifying the label 17.0...
	Classifying the label 18.0...
	Classifying the label 19.0...
	Classifying the label 20.0...
	Classifying the label 21.0...
	Classifying the label 22.0...
	Classifying the label 23.0...
	Classifying the label 24.0...
	Classifying the label 25.0...
	Classifying the label 26.0...
	Classifying the label 27.0...
	Classifying the label 28.0...
	Classifying the label 29.0...
	Classifying the label 30.0...
	Classifying the label 31.0...
	Clas

  _warn_prf(average, modifier, msg_start, len(result))


	Classifying the label 18.0...
	Classifying the label 19.0...
	Classifying the label 20.0...
	Classifying the label 21.0...
	Classifying the label 22.0...
	Classifying the label 23.0...
	Classifying the label 24.0...
	Classifying the label 25.0...
	Classifying the label 26.0...
	Classifying the label 27.0...
	Classifying the label 28.0...
	Classifying the label 29.0...
	Classifying the label 30.0...
	Classifying the label 31.0...
	Classifying the label 32.0...
	Classifying the label 33.0...
	Classifying the label 34.0...
	Classifying the label 35.0...
	Classifying the label 36.0...
	Classifying the label 37.0...
	Classifying the label 38.0...
	Classifying the label 39.0...
	Classifying the label 40.0...
	Classifying the label 41.0...
	Classifying the label 42.0...
	Classifying the label 43.0...
	Classifying the label 44.0...
	Classifying the label 45.0...
	Classifying the label 46.0...
	Classifying the label 47.0...
	Classifying the label 48.0...
	Classifying the label 49.0...
	Classif

In [40]:
results_df = pd.DataFrame(results)
results_df.to_csv('./ucr_isolation_forest.csv', index=False)