In [1]:
# Nestor Cabello, Elham Naghizade, Jianzhong Qi, Lars Kulik
# Fast, accurate and explainable time series classification through randomization.
# Data Min Know Disc (2023)


from rSTSF_functions import *

In [14]:
## 112 benchmark datasets from the UCR repository (http://wwww.timeseriesclassification.com)
# dset_names =   ["ACSF1","Adiac","ArrowHead","Beef","BeetleFly","BirdChicken","BME","Car","CBF","Chinatown",
#                 "ChlorineConcentration","CinCECGTorso","Coffee","Computers","CricketX","CricketY","CricketZ","Crop","DiatomSizeReduction",
#                 "DistalPhalanxOutlineAgeGroup","DistalPhalanxOutlineCorrect","DistalPhalanxTW",
#                 "Earthquakes","ECG200","ECG5000","ECGFiveDays","ElectricDevices","EOGHorizontalSignal","EOGVerticalSignal",
#                 "EthanolLevel","FaceAll","FaceFour","FacesUCR","FiftyWords","Fish",
#                 "FordA","FordB","FreezerRegularTrain","FreezerSmallTrain",
#                 "GunPoint","GunPointAgeSpan","GunPointMaleVersusFemale","GunPointOldVersusYoung","Ham","HandOutlines",
#                 "Haptics","Herring","HouseTwenty","InlineSkate","InsectEPGRegularTrain","InsectEPGSmallTrain",
#                 "InsectWingbeatSound","ItalyPowerDemand","LargeKitchenAppliances",
#                 "Lightning2","Lightning7","Mallat","Meat","MedicalImages",
#                 "MiddlePhalanxOutlineAgeGroup","MiddlePhalanxOutlineCorrect","MiddlePhalanxTW",
#                 "MixedShapesRegularTrain","MixedShapesSmallTrain","MoteStrain","NonInvasiveFetalECGThorax1", "NonInvasiveFetalECGThorax2",
#                 "OliveOil","OSULeaf","PhalangesOutlinesCorrect","Phoneme",
#                 "PigAirwayPressure","PigArtPressure","PigCVP","Plane","PowerCons","ProximalPhalanxOutlineAgeGroup",
#                 "ProximalPhalanxOutlineCorrect","ProximalPhalanxTW","RefrigerationDevices","Rock","ScreenType",
#                 "SemgHandGenderCh2","SemgHandMovementCh2","SemgHandSubjectCh2",
#                 "ShapeletSim","ShapesAll","SmallKitchenAppliances","SmoothSubspace","SonyAIBORobotSurface1","SonyAIBORobotSurface2",
#                 "StarLightCurves","Strawberry","SwedishLeaf","Symbols","SyntheticControl",
#                 "ToeSegmentation1","ToeSegmentation2","Trace","TwoLeadECG","TwoPatterns",
#                 "UMD","UWaveGestureLibraryAll","UWaveGestureLibraryX","UWaveGestureLibraryY",
#                 "UWaveGestureLibraryZ","Wafer","Wine","WordSynonyms","Worms","WormsTwoClass","Yoga"]

## 16 additional datasets from the UCR repository (http://www.timeseriesclassification.com)
# dset_names = ["AllGestureWiimoteX","AllGestureWiimoteY","AllGestureWiimoteZ",
#               "DodgerLoopDay","DodgerLoopGame","DodgerLoopWeekend",
#               "Fungi", "GestureMidAirD1", "GestureMidAirD2", "GestureMidAirD3", "GesturePebbleZ1", "GesturePebbleZ2",
#               "MelbournePedestrian","PLAID","PickupGestureWiimoteZ",
#               "ShakeGestureWiimoteZ"]

In [5]:
##Parameters that r-STSF uses

# Statistics or aggregation functions
# Note: np.percentile and np.quantile are just used as identifiers for 
# count mean-crossings (cmc) and count of values above mean (cam) statistics. See function getIntervalFeature(...)
agg_fns = [np.mean, np.std, np.polyfit, np.median, np.min, np.max, iqr, np.percentile, np.quantile]
repr_types = [1,2,3,4] # 1: Raw series, 2: Periodogram, 3: First-order Difference, 4: Autoregressive
d = 50 # Number of sets of candidate discriminatory interval features to compute
r = 500 # Number of trees

# The aggregation functions are optimized using Numba for Just-In-Time (JIT) compilation.
# The first time the code runs, Numba compiles these functions into machine code,
# incurring an initial overhead. However, the compiled code is cached for future use,
# making subsequent runs significantly faster.
# Therefore, run this block once to "warm up" the Numba cache and compile the necessary functions.
X_train, y_train, X_test, y_test = getTrainTestSets("SonyAIBORobotSurface2")
clf = rstsf(agg_fns=agg_fns, repr_types=repr_types[:1], d=1, r=2)
clf.fit(X_train,y_train)

In [13]:
dset_names = ["ItalyPowerDemand"]
# dset_names = ["ItalyPowerDemand","LargeKitchenAppliances","SonyAIBORobotSurface2","ECG200"]


nruns = 10

accuracies = np.zeros((len(dset_names),nruns))
training_times = []
testing_times = []

cont_dsets = 0
for dset_name in dset_names:
    print("Dataset: ", dset_name)

    X_train, y_train, X_test, y_test = getTrainTestSets(dset_name)
    
    inner_training_time = []
    inner_testing_time = []
    
    for nrun in range(nruns):
        print('run ',str(nrun+1))
        timeA = time.perf_counter()
        
        clf = rstsf()
        clf.fit(X_train, y_train)

        current_training_time = time.perf_counter()-timeA
        inner_training_time.append(current_training_time)
        print(f"training time: {current_training_time}")

        timeA = time.perf_counter()
        
        y_pred = clf.predict(X_test)


        current_testing_time = time.perf_counter()-timeA
        inner_testing_time.append(current_testing_time)
        print(f"testing time: {current_testing_time}")

        accu = np.sum(y_pred==y_test)/len(y_test)
        print('accuracy: ', accu)
        accuracies[cont_dsets,nrun] = accu

    avg_accuracy_this_dataset = np.mean(accuracies[cont_dsets,:])
    print('avg accuracy for ' + str(nruns) + ' runs: ' , avg_accuracy_this_dataset)
    
    training_times.append(np.mean(inner_training_time))
    testing_times.append(np.mean(inner_testing_time))

    cont_dsets+=1
    print("\n")


## comment/uncomment the lines below according to the number of runs
columns = {'Dataset':dset_names,
           'run1':accuracies[:,0],
               'run2':accuracies[:,1],
               'run3':accuracies[:,2],
               'run4':accuracies[:,3],
               'run5':accuracies[:,4],
               'run6':accuracies[:,5],
               'run7':accuracies[:,6],
               'run8':accuracies[:,7],
               'run9':accuracies[:,8],
               'run10':accuracies[:,9],
           'avgAccu':np.mean(accuracies,axis=1),
           'avgTrainTime':np.array(training_times),'avgTestTime':np.array(testing_times)}
dfResults = pd.DataFrame(columns)
dfResults = dfResults[['Dataset',
                       'run1',
                           'run2',
                           'run3',
                           'run4',
                           'run5',
                           'run6',
                           'run7',
                           'run8',
                           'run9',
                           'run10',
                       'avgAccu','avgTrainTime','avgTestTime'
                      ]]
    

dfResults.to_csv("output/r-STSF_experiment_run_at_" + time.strftime("%d-%m-%Y %H%M%S") + ".csv",encoding='utf-8' ,index=False) 

Dataset:  ItalyPowerDemand
run  1
training time: 0.41277583300001197
testing time: 0.1489364170000158
accuracy:  0.9727891156462585
run  2
training time: 0.3997947080000017
testing time: 0.14405658300000823
accuracy:  0.9727891156462585
run  3
training time: 0.48313983300002405
testing time: 0.14480625000001623
accuracy:  0.9727891156462585
run  4
training time: 0.3987554169999896
testing time: 0.14310166699999627
accuracy:  0.9737609329446064
run  5
training time: 0.4067470840000169
testing time: 0.15134674999998765
accuracy:  0.9727891156462585
run  6
training time: 0.4297733340000036
testing time: 0.1492786250000222
accuracy:  0.9727891156462585
run  7
training time: 0.39606604200000106
testing time: 0.14329187499998852
accuracy:  0.9737609329446064
run  8
training time: 0.4017372920000071
testing time: 0.15447708400000693
accuracy:  0.9727891156462585
run  9
training time: 0.3934539999999913
testing time: 0.1455380419999983
accuracy:  0.9727891156462585
run  10
training time: 0.402