In [118]:
import sys
import pandas as pd
import os 
import glob
import matplotlib.pyplot as plt
import numpy as np
from scipy.stats import wilcoxon as _wilcoxon 
from scipy.stats import binom_test as _binom_test
from itertools import combinations
from IPython.display import display
from itertools import product
import seaborn as sns
from sktime.benchmarking.evaluation import Evaluator
from sktime.benchmarking.results import HDDResults
from sktime.benchmarking.metrics import PairwiseMetric
from sklearn.metrics import accuracy_score
from sktime.series_as_features.model_selection import PresplitFilesCV
from joblib import load
%matplotlib inline

In [143]:
def wilcoxon_test(x, y):
    return _wilcoxon(x, y).pvalue

In [144]:
def binom_test(x, y):
    n = len(x)
    x_wins = np.sum(x > y)

    # draws support the null-hypothesis, so we do not discount them but split them 
    # evenly between the two estimators; if there is an odd number of them, we ignore one.
    draws = np.sum(x == y)
    if draws > 0:
        if draws % 2 != 0:
            n -= 1
        x_wins += draws // 2

    return _binom_test(x_wins, n=n)

In [152]:
wilcoxon_test(a, b)

0.560862202981874

In [153]:
binom_test(a, b)

0.44767342529727316

In [1]:
def compare_results(x, y):
    a = x.values
    b = y.values

    # wilcoxon test
    pwil = wilcoxon(a, b).pvalue
    
    # binomial test
    x_wins = np.mean(a > b)
    y_wins = np.mean(b > a)
    draw = np.mean(a == b)
    pbin = binom_test(np.sum(x_wins), n=x.shape[0], p=0.5, alternative='two-sided')
    diff = x - y
    
    # combine results
    results = pd.Series({'wilcoxon_pval': pwil,
                        'x_wins': x_wins, 
                        'y_wins': y_wins, 
                        'draw': draw,
                        'binomial_pval': pbin})
    results = pd.concat([results, diff.describe()], axis=0)
    # display results
    display(pd.DataFrame(results).T.drop(columns='count').round(3))
    
    # scatter plot
    fig, ax = plt.subplots(1)
    ax.scatter(a, b)
    ax.plot([0, np.max([np.max(x), np.max(y)])], 
            [0, np.max([np.max(x), np.max(y)])], 
            'red', linewidth=1)
    # ax.set_aspect('equal')
    ax.set(xlabel=x.name, ylabel=y.name);


/home/ucfamml/.conda/envs/sktime/bin/python


In [29]:
HOME = os.path.expanduser("~")
REPO = os.path.join(HOME, "Documents/Research/software/sktime/sktime-benchmarking/")
EXPERIMENT = os.path.join(REPO, "experiments/tsfresh")

In [30]:
# experiments/tsfresh/results/results.pickle
results = load(os.path.join(EXPERIMENT, "results", "results.pickle"))
results.cv = PresplitFilesCV()

evaluator = Evaluator(results=results)
metric = PairwiseMetric(func=accuracy_score, name="accuracy")
evaluator.evaluate(metric)
evaluator.metrics_by_strategy_dataset.to_csv("accuracy.csv", header=True)

In [31]:
tsfresh = pd.read_csv("accuracy.csv", index_col=0, header=0)
tsfresh = tsfresh.drop(columns="accuracy_stderr").pivot(index="dataset", columns="strategy", values="accuracy_mean")

results = pd.read_csv(os.path.join(REPO, "results", "Resamples.csv"), index_col=0)  # published results

In [32]:
r = pd.merge(results, tsfresh, left_index=True, right_index=True).T

r.rank(ascending=False).mean(axis=1).sort_values()

HIVE-COTE                    2.83750
Flat-COTE                    4.65625
ST                           8.98125
BOSS                         9.58125
EE                          11.16875
DTW_F                       13.72500
TSF                         14.64375
TSBF                        14.85000
LS                          15.48125
tsfresh-rf-efficient-200    15.60625
RotF                        16.79375
MSM_1NN                     16.88750
LPS                         16.96250
DD_DTW                      18.76250
DTD_C                       19.10000
LCSS_1NN                    19.20000
TWE_1NN                     19.25625
CID_DTW                     19.32500
WDTW_1NN                    19.62500
DTW_Rn_1NN                  20.43125
ERP_1NN                     20.46250
RandF                       20.68125
WDDTW_1NN                   21.62500
SVMQ                        22.16250
DDTW_Rn_1NN                 22.37500
PS                          22.41875
DTW_R1_1NN                  23.18750
M