In [None]:
import pandas as pd
import numpy as np
import scipy

from collections import defaultdict

In [None]:
classifiers  = {'decision_tree': 'decision tree',
                'random_forest': 'random forest',
                'knn':           'k-nearest-neighbour',
                'naive_bayes':   'naive bayes', 
                'mlp':           'multi-layer perceptron',
                'svm':           'support vector machine'}
space_decomp = {'bl':  'baseline',
                'oaa': 'one-against-all',
                'rr':  'round-robin'}
time_decomp  = {'full':     'without time decomposition',
                'combined': 'with time decomposition'}

In [None]:
def chi2_mcnemar(b, c):
    if b+c < 8:
        # raise NotImplementedError(f'b+c={b+c}')
        return 0
    elif 8 <= b+c < 30:
        return (abs(b-c)-1)**2 / (b+c+1)
    else: # b+c > 30
        return (b-c)**2 / (b+c+1)

In [None]:
def calculate_chi2(file1, file2):
    df1 = pd.read_csv(file1, index_col='file')
    df2 = pd.read_csv(file2, index_col='file')

    df1['correct'] = df1['genre'] == df1['predicted']
    df2['correct'] = df2['genre'] == df2['predicted']

    crosstab = defaultdict(int)
    cases = {(True, True): 'a', (False, True): 'b', (True, False): 'c', (False, False): 'd'}
    for i in df1.index:
        crosstab[cases[(df1.loc[i, 'correct'], df2.loc[i, 'correct'])]] += 1

    a, b, c, d = crosstab['a'], crosstab['b'], crosstab['c'], crosstab['d']
    performs_better = (a+b) > (a+c)
    chi2 = chi2_mcnemar(b, c)
    return performs_better, b, c, chi2, scipy.stats.chi2.sf(chi2, 1)

### does time composition significantly improve the results?

In [None]:
for cl, cl_verbose in classifiers.items():
    for sd, sd_verbose in space_decomp.items():
        file1 = f'./statistics/{cl}_{sd}_all_full.csv'
        file2 = f'./statistics/{cl}_{sd}_all_combined.csv'
        try:
            performs_better, b, c, chi2, sf = calculate_chi2(file1, file2)
            if chi2 == 0:
                print(f'{cl_verbose:22}, {sd_verbose:15}: b={b:3}, c={c:3}')
            else:
                print(f'{cl_verbose:22}, {sd_verbose:15}: b={b:3}, c={c:3}, chi^2={chi2:>8.5f}, sf={sf:.5f}', end='')
                print(' - 2nd classifier does not perform better' if not performs_better else '')
        except FileNotFoundError:
            continue

### does one-against-all perform better than baseline?

In [None]:
for cl, cl_verbose in classifiers.items():
    for td, td_verbose in time_decomp.items():
        file1 = f'./statistics/{cl}_bl_all_{td}.csv'
        file2 = f'./statistics/{cl}_oaa_all_{td}.csv'
        try:
            performs_better, b, c, chi2, sf = calculate_chi2(file1, file2)
            if chi2 == 0:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}')
            else:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}, chi^2={chi2:>8.5f}, sf={sf:.5f}', end='')
                print(' - 2nd classifier does not perform better' if not performs_better else '')
        except FileNotFoundError:
            continue

### does round-robin perform better than baseline?

In [None]:
for cl, cl_verbose in classifiers.items():
    for td, td_verbose in time_decomp.items():
        file1 = f'./statistics/{cl}_bl_all_{td}.csv'
        file2 = f'./statistics/{cl}_rr_all_{td}.csv'
        try:
            performs_better, b, c, chi2, sf = calculate_chi2(file1, file2)
            if chi2 == 0:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}')
            else:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}, chi^2={chi2:>8.5f}, sf={sf:.5f}', end='')
                print(' - 2nd classifier does not perform better' if not performs_better else '')
        except FileNotFoundError:
            continue

### does round-robin perform better than one-against-all?

In [None]:
for cl, cl_verbose in classifiers.items():
    for td, td_verbose in time_decomp.items():
        file1 = f'./statistics/{cl}_oaa_all_{td}.csv'
        file2 = f'./statistics/{cl}_rr_all_{td}.csv'
        try:
            performs_better, b, c, chi2, sf = calculate_chi2(file1, file2)
            if chi2 == 0:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}')
            else:
                print(f'{cl_verbose:22}, {td_verbose:26}: b={b:3}, c={c:3}, chi^2={chi2:>8.5f}, sf={sf:.5f}', end='')
                print(' - 2nd classifier does not perform better' if not performs_better else '')
        except FileNotFoundError:
            continue

### do SVMs perform better than the other classifiers?

In [None]:
for cl, cl_verbose in list(classifiers.items())[:-1]:
    for sd, sd_verbose in space_decomp.items():
        for td, td_verbose in time_decomp.items():
            file1 = f'./statistics/{cl}_{sd}_all_{td}.csv'
            file2 = f'./statistics/svm_{sd}_all_{td}.csv'
            try:
                performs_better, b, c, chi2, sf = calculate_chi2(file1, file2)
                if chi2 == 0:
                    print(f'{cl_verbose:22}, {sd_verbose:15}, {td_verbose:26}: b={b:3}, c={c:3}')
                else:
                    print(f'{cl_verbose:22}, {sd_verbose:15}, {td_verbose:26}: b={b:3}, c={c:3}, chi^2={chi2:>8.5f}, sf={sf:.5f}', end='')
                    print(' - 2nd classifier does not perform better' if not performs_better else '')
            except FileNotFoundError:
                continue