In [1]:
from scipy.stats import wilcoxon
from scipy.stats import shapiro
import pandas as pd
import shutil
import numpy as np

Aqui nós realizaremos tanto o teste de Shapiro-wilk para verificar a normalidade dos resultados como o teste não-paramétrico Wilcoxon Signed-Rank. Como alguns dos resultados serão não-gaussianos, nós optamos por utilizar o teste não-paramétrico em todos os casos.

In [2]:
metric_name = ['acs','gm','f1']
def teste_nao_parametrico(result_df):
    for i in range(0,3):
        columns = result_df.columns

        data1 = result_df[columns[i]]
        data2 = result_df[columns[i+3]]

        stat, p = wilcoxon(data1, data2)
        print('stat=%.3f, p=%.3f' % (stat, p))
        if p > 0.05:
          print('H0 not rejected for {} metric: Probably the same distribution'.format(metric_name[i]))
        else:
          print('H0 rejected for {} metric: Probably different distributions'.format(metric_name[i]))

In [3]:
def shapiro_wilk(result_df):  
    shapiro_p_value_array = np.zeros(6)
    # execute shapiro-wilk test for normality on metrics distributions
    i=0
    for column in result_df.columns:
      data = result_df[column]
      stat, p = shapiro(data)
      print('stat=%.3f, p=%.3f' % (stat, p))
      shapiro_p_value_array[i] = p
      if p > 0.05:
        print('H0 not rejected for {} metric: Probably Gaussian'.format(column))
      else:
        print('H0 rejected for {} metric: Probably not Gaussian'.format(column))
      i +=1

## MNIST

In [4]:
result_df = pd.read_csv('data_projeto/data_MNIST.csv', header=[0])
result_df.columns = ['ACSA DeepSMOTE', 'GM DeepSMOTE', 'F1 DeepSMOTE', 'ACSA DeepADASYN', 'GM DeepADASYN', 'F1 DeepADASYN']
result_df

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
0,0.98725,0.964158,0.935715,0.986807,0.962892,0.932881
1,0.988303,0.967127,0.940348,0.987753,0.965562,0.93793
2,0.98772,0.96548,0.937734,0.98575,0.959866,0.927968
3,0.98941,0.970283,0.946563,0.989243,0.969822,0.945629
4,0.98783,0.9658,0.938557,0.9834,0.953073,0.915691
5,0.98866,0.968152,0.942565,0.986673,0.962506,0.932012
6,0.987553,0.964979,0.935765,0.986757,0.96274,0.933383
7,0.988443,0.967544,0.941876,0.987573,0.965067,0.936907
8,0.987437,0.964691,0.935984,0.988047,0.966423,0.939639
9,0.985647,0.959593,0.926815,0.984947,0.95758,0.923357


In [5]:
df_mean = result_df.describe()
df_mean.iloc[1:3]

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
mean,0.987897,0.965978,0.938458,0.986389,0.961639,0.930433
std,0.001093,0.003118,0.00582,0.002231,0.006443,0.012115


In [6]:
shapiro_wilk(result_df)

stat=0.924, p=0.035
H0 rejected for ACSA DeepSMOTE metric: Probably not Gaussian
stat=0.923, p=0.032
H0 rejected for GM DeepSMOTE metric: Probably not Gaussian
stat=0.918, p=0.024
H0 rejected for F1 DeepSMOTE metric: Probably not Gaussian
stat=0.910, p=0.015
H0 rejected for ACSA DeepADASYN metric: Probably not Gaussian
stat=0.906, p=0.012
H0 rejected for GM DeepADASYN metric: Probably not Gaussian
stat=0.903, p=0.010
H0 rejected for F1 DeepADASYN metric: Probably not Gaussian


In [7]:
teste_nao_parametrico(result_df)

stat=78.000, p=0.001
H0 rejected for acs metric: Probably different distributions
stat=80.000, p=0.002
H0 rejected for gm metric: Probably different distributions
stat=85.000, p=0.002
H0 rejected for f1 metric: Probably different distributions


## FASHIONMNIST

In [8]:
result_df = pd.read_csv('data_projeto/data_FMNIST.csv', header=[0])
result_df.columns = ['ACSA DeepSMOTE', 'GM DeepSMOTE', 'F1 DeepSMOTE', 'ACSA DeepADASYN', 'GM DeepADASYN', 'F1 DeepADASYN']
result_df

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
0,0.959987,0.884368,0.791665,0.956137,0.872597,0.771474
1,0.960223,0.885085,0.791408,0.957657,0.877264,0.777952
2,0.95959,0.88317,0.791802,0.959243,0.882117,0.784199
3,0.958063,0.878545,0.782429,0.963203,0.894049,0.810287
4,0.96062,0.886275,0.791167,0.96012,0.884771,0.792928
5,0.95612,0.872596,0.771396,0.956943,0.875109,0.776819
6,0.956847,0.874804,0.775281,0.958347,0.879331,0.782652
7,0.96077,0.886726,0.795618,0.95577,0.871423,0.763648
8,0.959997,0.884392,0.791225,0.958677,0.880375,0.78352
9,0.96125,0.888176,0.799533,0.95996,0.884298,0.787765


In [9]:
df_mean = result_df.describe()
df_mean.iloc[1:3]

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
mean,0.959722,0.883552,0.790314,0.959111,0.881692,0.785435
std,0.001632,0.004944,0.008482,0.001584,0.00481,0.008615


In [10]:
shapiro_wilk(result_df)

stat=0.975, p=0.329
H0 not rejected for ACSA DeepSMOTE metric: Probably Gaussian
stat=0.974, p=0.310
H0 not rejected for GM DeepSMOTE metric: Probably Gaussian
stat=0.969, p=0.182
H0 not rejected for F1 DeepSMOTE metric: Probably Gaussian
stat=0.976, p=0.348
H0 not rejected for ACSA DeepADASYN metric: Probably Gaussian
stat=0.976, p=0.350
H0 not rejected for GM DeepADASYN metric: Probably Gaussian
stat=0.985, p=0.724
H0 not rejected for F1 DeepADASYN metric: Probably Gaussian


In [11]:
teste_nao_parametrico(result_df)

stat=488.000, p=0.044
H0 rejected for acs metric: Probably different distributions
stat=489.000, p=0.045
H0 rejected for gm metric: Probably different distributions
stat=384.000, p=0.003
H0 rejected for f1 metric: Probably different distributions


## CIFAR-10

In [12]:
result_df = pd.read_csv('data_projeto/data_CIFAR10.csv', header=[0])
result_df.columns = ['ACSA DeepSMOTE', 'GM DeepSMOTE', 'F1 DeepSMOTE', 'ACSA DeepADASYN', 'GM DeepADASYN', 'F1 DeepADASYN']
result_df

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
0,0.881103,0.615221,0.372066,0.880893,0.614391,0.368918
1,0.88034,0.612301,0.367378,0.88308,0.622835,0.37911
2,0.878217,0.603789,0.352476,0.88044,0.612457,0.365554
3,0.880347,0.612279,0.370521,0.882057,0.619108,0.371781
4,0.878873,0.6063,0.359763,0.87848,0.604884,0.34736
5,0.879633,0.60945,0.366008,0.882293,0.619968,0.374847
6,0.880547,0.612936,0.378041,0.882283,0.620034,0.379531
7,0.882737,0.621838,0.385219,0.88195,0.618346,0.374958
8,0.880097,0.611326,0.367711,0.883483,0.624747,0.381552
9,0.880917,0.614406,0.375215,0.883117,0.623172,0.383924


In [13]:
df_mean = result_df.describe()
df_mean.iloc[1:3]

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
mean,0.880452,0.612621,0.370336,0.881746,0.617724,0.372868
std,0.001574,0.006325,0.01024,0.001477,0.005854,0.009904


In [14]:
shapiro_wilk(result_df)

stat=0.939, p=0.485
H0 not rejected for ACSA DeepSMOTE metric: Probably Gaussian
stat=0.936, p=0.454
H0 not rejected for GM DeepSMOTE metric: Probably Gaussian
stat=0.981, p=0.987
H0 not rejected for F1 DeepSMOTE metric: Probably Gaussian
stat=0.910, p=0.214
H0 not rejected for ACSA DeepADASYN metric: Probably Gaussian
stat=0.914, p=0.243
H0 not rejected for GM DeepADASYN metric: Probably Gaussian
stat=0.860, p=0.049
H0 rejected for F1 DeepADASYN metric: Probably not Gaussian


In [15]:
teste_nao_parametrico(result_df)

stat=10.000, p=0.021
H0 rejected for acs metric: Probably different distributions
stat=10.000, p=0.021
H0 rejected for gm metric: Probably different distributions
stat=26.000, p=0.339
H0 not rejected for f1 metric: Probably the same distribution


## SVHN

In [16]:
result_df = pd.read_csv('data_projeto/data_SVHN.csv', header=[0])
result_df.columns = ['ACSA DeepSMOTE', 'GM DeepSMOTE', 'F1 DeepSMOTE', 'ACSA DeepADASYN', 'GM DeepADASYN', 'F1 DeepADASYN']
result_df

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
0,0.948109,0.817048,0.698489,0.949961,0.822857,0.701213
1,0.948761,0.818411,0.691138,0.950618,0.824561,0.702154
2,0.948348,0.816542,0.68941,0.913376,0.700027,0.531874
3,0.948592,0.817352,0.691351,0.946108,0.809846,0.677704
4,0.94932,0.820192,0.697615,0.946533,0.8106,0.679588
5,0.949931,0.821126,0.696827,0.94241,0.79843,0.665312
6,0.94948,0.820588,0.69975,0.94295,0.79969,0.664328
7,0.948448,0.817712,0.691647,0.948481,0.817439,0.690323
8,0.950575,0.824041,0.698916,0.948487,0.816413,0.687604
9,0.950677,0.823578,0.697456,0.947135,0.81364,0.685537


In [17]:
df_mean = result_df.describe()
df_mean.iloc[1:3]

Unnamed: 0,ACSA DeepSMOTE,GM DeepSMOTE,F1 DeepSMOTE,ACSA DeepADASYN,GM DeepADASYN,F1 DeepADASYN
mean,0.949224,0.819659,0.69526,0.943606,0.80135,0.668564
std,0.000927,0.002689,0.003892,0.010955,0.036626,0.049706


In [18]:
shapiro_wilk(result_df)

stat=0.911, p=0.289
H0 not rejected for ACSA DeepSMOTE metric: Probably Gaussian
stat=0.907, p=0.264
H0 not rejected for GM DeepSMOTE metric: Probably Gaussian
stat=0.852, p=0.062
H0 not rejected for F1 DeepSMOTE metric: Probably Gaussian
stat=0.595, p=0.000
H0 rejected for ACSA DeepADASYN metric: Probably not Gaussian
stat=0.587, p=0.000
H0 rejected for GM DeepADASYN metric: Probably not Gaussian
stat=0.612, p=0.000
H0 rejected for F1 DeepADASYN metric: Probably not Gaussian


In [19]:
teste_nao_parametrico(result_df)

stat=6.000, p=0.027
H0 rejected for acs metric: Probably different distributions
stat=5.000, p=0.020
H0 rejected for gm metric: Probably different distributions
stat=5.000, p=0.020
H0 rejected for f1 metric: Probably different distributions
