In [None]:
import os
import time
from scipy.stats import (
    anderson,
    chi2_contingency,
    f_oneway,
    friedmanchisquare,
    kendalltau,
    kruskal,
    mannwhitneyu,
    normaltest,
    pearsonr,
    shapiro,
    spearmanr,
    ttest_ind,
    ttest_rel,
    wilcoxon,
)

import pandas as pd
import numpy as np
# from mpl_toolkits import mplot3d
# from mpl_toolkits.mplot3d import Axes3D
# import matplotlib as mpl
# mpl.use('Qt5Agg')
import matplotlib.pyplot as plt
import seaborn as sns

In [336]:
"""Load data as data frame"""
src_train = '/home/melandur/Data/Myocarditis/csv/train/7_merged/'
src_test = '/home/melandur/Data/Myocarditis/csv/test/7_merged/'


def load_data(path):
    files = os.listdir(path)
    df_store = {}
    for file in files:
        if 'aha' in file and 'sample' not in file:
            file_path = os.path.join(path, file)
            df = pd.read_excel(file_path, )
            name = f"{'_'.join(file.split('_')[1:3])}_{file.split('_')[-1]}".split('.xlsx')[0]
            if not 'global' in file:
                df_store[name] = df.iloc[1:, 1:]  # drop first column and row
    return df_store

df_train = load_data(src_train)
df_test = load_data(src_test)

In [335]:
print(df_test.keys())

dict_keys(['longit_velocity_12', 'radial_strain_14', 'circumf_strain_5', 'radial_velocity_1', 'circumf_velocity_7', 'longit_strain_13', 'radial_velocity_4', 'longit_velocity_5', 'circumf_strain_13', 'longit_velocity_16', 'longit_velocity_3', 'radial_velocity_2', 'radial_strain_2', 'longit_velocity_1', 'radial_velocity_12', 'circumf_strain_8', 'circumf_velocity_2', 'circumf_velocity_1', 'longit_velocity_14', 'longit_strain_7', 'circumf_strain_12', 'radial_strain_5', 'circumf_velocity_4', 'radial_strain_6', 'circumf_velocity_3', 'longit_strain_8', 'radial_velocity_3', 'radial_velocity_13', 'circumf_velocity_10', 'longit_strain_9', 'radial_strain_1', 'circumf_strain_14', 'circumf_strain_10', 'circumf_velocity_11', 'circumf_velocity_15', 'radial_strain_4', 'longit_velocity_9', 'circumf_strain_4', 'radial_strain_10', 'radial_strain_7', 'radial_strain_12', 'circumf_velocity_9', 'longit_strain_10', 'circumf_velocity_5', 'longit_strain_14', 'longit_strain_11', 'longit_velocity_15', 'radial_vel

In [337]:
def euclider(store, name, min=0, max=100):
    euclid_store = {}
    for idx in range(1, 17):
        df_longit = store[f'longit_{name}_{idx}'].iloc[:, min:max]
        df_circumf = store[f'circumf_{name}_{idx}'].iloc[:, min:max]
        df_radial = store[f'radial_{name}_{idx}'].iloc[:, min:max]

        # euclid distance
        df_longit = abs(df_longit) ** 2
        df_circumf = abs(df_circumf) ** 2
        df_radial = abs(df_radial) ** 2
        summed = df_longit + df_circumf.values + df_radial.values
        euclid_store[f'{idx}'] = summed ** (1 / 2)
    return euclid_store


df_e_train_s = euclider(df_train, 'strain')
df_e_test_s = euclider(df_test, 'strain')

df_e_train_v = euclider(df_train, 'velocity')
df_e_test_v = euclider(df_test, 'velocity')

# sns.boxplot(data=test, ax=axes[1])

# print('######')
# print(f'{train.mean()} - {test.mean()}')
# print(f'{train.median()} - {test.median()}')
# print(f'{train.std()} - {test.std()}')



{'1':     case_231   case_48  case_305  case_190  case_239  case_236  case_280  \
1   0.822979  0.917724  0.715354  0.466906  0.853609  0.421243  0.648325   
2   2.404662  1.102370  0.860131  2.138897  1.271233  0.861114  2.994515   
3   2.863164  2.267067  2.596051  2.495361  2.433331  1.851411  2.419579   
4   2.891855  5.065679  2.347916  3.062410  1.786158  1.973781  2.020318   
5   3.061485  1.633228  3.150447  2.479052  1.349470  2.040641  2.721990   
6   3.132179  2.413771  2.544199  1.569450  1.102752  2.502360  3.604461   
7   2.882876  3.401042  2.072473  0.220737  0.912310  2.000456  2.665402   
8   2.015891  3.036384  1.812961  1.115283  0.057645  1.478919  2.066913   
9   1.013052  2.737751  1.201383  1.222134  1.470223  0.885009  1.004875   
10  2.586038  0.592377  1.819590  3.212659  0.950514  0.438780  2.122731   
11  3.057861  0.384544  0.154068  3.000989  1.209438  0.384579  1.915308   
12  3.240849  1.267705  0.908558  1.047669  1.281970  1.212461  2.897058   
13  1.

In [None]:
# fourier
from scipy.fft import fft, rfft, irfft
from scipy.fft import fft, fftfreq

for x in range(1, 17):
    print(f'{x}')
    test_e = df_e_test[f'{x}'].melt()['value'].to_numpy()
    train_e = df_e_train[f'{x}'].melt()['value'].to_numpy()
    test_tmp = test_e
    train_tmp = train_e
    for x in range(50):
        if x % 2 == 1:
            test_e = np.concatenate((test_e, test_tmp))
            train_e = np.concatenate((train_e, train_tmp))
        else:
            test_e = np.concatenate((test_e, np.flip(test_tmp)))
            train_e = np.concatenate((train_e, np.flip(train_tmp)))

    T = 1 / 50
    yf = fft(test_e)
    yf1 = fft(train_e)

    plt.figure(1)
    plt.subplot(211)

    xf = fftfreq(len(test_e), T)[:len(test_e) // 2]
    xf1 = fftfreq(len(train_e), T)[:len(train_e) // 2]
    plt.plot(xf, 2.0 / len(test_e) * np.abs(yf[0:len(test_e) // 2]), 'cyan')
    plt.subplot(212)
    plt.plot(xf1, 2.0 / len(train_e) * np.abs(yf1[0:len(train_e) // 2]), 'black')

    plt.grid()
    plt.show()

In [None]:
for x in range(1, 17):
    print(f'{x}')
    train = df_e_train[f'{x}'].melt()['value']
    test = df_e_test[f'{x}'].melt()['value']

    stat, p = shapiro(train)
    print(f'Shapiro test -> train-set', stat, p)

    stat, p = shapiro(test)
    print('Shapiro test -> test-set', stat, p)

    stat, p = normaltest(train)
    print('Normal test -> train-set ', stat, p)

    stat, p = normaltest(test)
    print('Normal test -> test-set ', stat, p)

    stat, p = f_oneway(train, test)
    print(f'Anova test: {stat:.4f}, {p:.4f}')
    #
    stat, p = mannwhitneyu(train, test)
    print(f'Mannwithneyu test: {stat:.4f}, {p:.4f}')

    stat, p = ttest_ind(train, test)
    print(f't_test: {stat:.4f}, {p:.4f}')

In [318]:
for i in range(1, 17):
    print(f'## {i} ##')
    x = df_e_train[f'{i}'].melt()
    x = x.drop(x.columns[x.columns.str.contains('variable', case=False)], axis=1)
    x = x['value']
    y = df_e_test[f'{i}'].melt()
    y = y.drop(y.columns[y.columns.str.contains('variable', case=False)], axis=1)
    y = y['value']
    print(f'{round(float(x.mean()), 2)} -- {round(float(y.mean()), 2)}')
    print(f'{round(float(x.median()), 2)} -- {round(float(y.median()), 2)}')
    print(f'{round(float(x.std()), 2)} -- {round(float(y.std()), 2)}')



## 1 ##
1.16 -- 1.46
0.83 -- 1.26
1.05 -- 1.11
## 2 ##
1.0 -- 1.1
0.77 -- 0.97
0.79 -- 0.78
## 3 ##
0.84 -- 1.08
0.68 -- 0.9
0.64 -- 0.8
## 4 ##
0.9 -- 1.18
0.68 -- 0.95
0.77 -- 0.96
## 5 ##
1.13 -- 1.69
0.82 -- 1.32
1.05 -- 1.55
## 6 ##
1.26 -- 1.87
0.89 -- 1.51
1.18 -- 1.63
## 7 ##
1.04 -- 1.37
0.77 -- 1.17
1.05 -- 1.21
## 8 ##
0.92 -- 1.11
0.73 -- 1.01
0.72 -- 0.79
## 9 ##
0.77 -- 0.96
0.61 -- 0.83
0.58 -- 0.76
## 10 ##
0.86 -- 1.07
0.63 -- 0.81
0.89 -- 1.01
## 11 ##
1.15 -- 1.43
0.78 -- 1.05
1.82 -- 1.33
## 12 ##
1.13 -- 1.54
0.83 -- 1.25
1.11 -- 1.41
## 13 ##
1.23 -- 1.83
0.9 -- 1.4
1.51 -- 2.55
## 14 ##
1.09 -- 1.45
0.81 -- 1.17
1.06 -- 1.55
## 15 ##
1.19 -- 1.56
0.85 -- 1.26
1.43 -- 1.93
## 16 ##
1.3 -- 2.0
0.95 -- 1.56
1.45 -- 3.02
