In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

import gudhi as gd
import gudhi.representations
import gudhi.representations.vector_methods

from scipy.stats import ks_2samp
from scipy.stats import chi2_contingency

In [None]:
def load_data(link):
    return np.load(link).astype(np.float32)

# Genetic

In [None]:
datasets = ['GenesExtended', 'Genes'] # Genes, GenesExtended
methods = ['', 'Basic AutoEncoder_', 'Topological AutoEncoder_',
           #'PCA_', 'tSNE_', 
           'UMAP_']

tests = ['KS']

### Persistence diagrams

In [None]:
table = []
for test in tests:
    table_test = []
    for dataset in datasets:
        table_dataset_0 = []
        table_dataset_1 = []
        for method in methods:
            # loading data
            AD_dim0 = load_data(f'{dataset}/diagrams/{method}AD_diagram_0.npy')
            control_dim0 = load_data(f'{dataset}/diagrams/{method}control_diagram_0.npy')
            AD_dim1 = load_data(f'{dataset}/diagrams/{method}AD_diagram_1.npy')
            control_dim1 = load_data(f'{dataset}/diagrams/{method}control_diagram_1.npy')

            # get diagrams for plotting
            AD_diagram = []
            for el in AD_dim0:
                AD_diagram.append((0, el))
            for el in AD_dim1:
                AD_diagram.append((1, el))

            control_diagram = []
            for el in control_dim0:
                control_diagram.append((0, el))
            for el in control_dim1:
                control_diagram.append((1, el))

            # 0 dim test
            if test == 'KS':
                # Kolmogorov-Smirnov test
                statistic, p_value = ks_2samp(control_dim0[:, 1], AD_dim0[:, 1])
            elif test == 'Chi2':
                statistic, p_value, _, _ = chi2_contingency(control_dim0[:, 1], AD_dim0[:, 1])
            print(f"{dataset}, {method}, dim 0, Test Statistic:", statistic)
            print(f"{dataset}, {method}, dim 0,  P-value:", p_value)
            table_dataset_0.append([method, p_value])

            # 1 dim test
            landscape = gd.representations.Landscape(resolution=200)
            if test == 'KS':
                # Kolmogorov-Smirnov test
                statistic, p_value = ks_2samp(landscape.fit_transform([control_dim1])[0], landscape.fit_transform([AD_dim1])[0])
            elif test == 'Chi2':
                try:
                    statistic, p_value, _, _ = chi2_contingency(landscape.fit_transform([control_dim1])[0], landscape.fit_transform([AD_dim1])[0])
                except ValueError:
                    statistic, p_value = -1, -1
            print(f"{dataset}, {method}, dim 1, Test Statistic:", statistic)
            print(f"{dataset}, {method}, dim 1,  P-value:", p_value)
            table_dataset_1.append([method, p_value])
        table_test.append([test, 'Dim 0', dataset, table_dataset_0])
        table_test.append([test, 'Dim 1', dataset, table_dataset_1])
    table.append(table_test)

In [None]:
table

### Weights distribution

In [None]:
datasets = ['GenesExtended', 'Genes'] # Genes, GenesExtended
methods = ['', 'Basic AutoEncoder_', 'Topological AutoEncoder_',
           #'PCA_', 'tSNE_', 
           'UMAP_'] 

tests = ['KS']

In [None]:
table = []
for test in tests:
    table_test = []
    for dataset in datasets:
        table_dataset = []
        for method in methods:       
            # loading data
            hist_AD = load_data(f'{dataset}/diagrams/{method}hist_AD_0.npy')
            hist_AD_range = load_data(f'{dataset}/diagrams/{method}hist_AD_1.npy')
            hist_control = load_data(f'{dataset}/diagrams/{method}hist_control_0.npy')
            hist_control_range = load_data(f'{dataset}/diagrams/{method}hist_control_1.npy')

            if test == 'KS':
                # Kolmogorov-Smirnov test
                statistic, p_value = ks_2samp(hist_AD, hist_control)
            elif test == 'Chi2':
                statistic, p_value, _, _ = chi2_contingency(hist_AD, hist_control)
            print(f"{dataset}, {test}, Test Statistic:", statistic)
            print(f"{dataset}, {test}, P-value:", p_value)
            table_dataset.append([method, dataset, p_value])       
        table_test.append([dataset, table_dataset])
    table.append(table_test)

In [None]:
table

# fMRI data

In [None]:
datasets = ['ADNI', 'CNI', 'OASIS'] 

### Persistence diagrams

In [None]:
table = []
for dataset in datasets:
    table_dataset_0 = []
    table_dataset_1 = []
    
    # loading data
    AD_dim0 = load_data(f'fMRI_results/{dataset}/AD_diagram_0.npy')
    control_dim0 = load_data(f'fMRI_results/{dataset}/control_diagram_0.npy')
    AD_dim1 = load_data(f'fMRI_results/{dataset}/AD_diagram_1.npy')
    control_dim1 = load_data(f'fMRI_results/{dataset}/control_diagram_1.npy')


    # 0 dim test
    # Kolmogorov-Smirnov test
    statistic, p_value = ks_2samp(control_dim0[:, 1], AD_dim0[:, 1])
    print(f"{dataset}, dim 0, Test Statistic:", statistic)
    print(f"{dataset}, dim 0,  P-value:", p_value)
    table_dataset_0.append([p_value])

    # 1 dim test
    landscape = gd.representations.Landscape(resolution=200)
    # Kolmogorov-Smirnov test
    statistic, p_value = ks_2samp(landscape.fit_transform([control_dim1])[0], landscape.fit_transform([AD_dim1])[0])
    print(f"{dataset}, dim 1, Test Statistic:", statistic)
    print(f"{dataset}, dim 1,  P-value:", p_value)
    table_dataset_1.append([p_value])
    table.append([dataset, 'Dim 0', table_dataset_0])
    table.append([dataset, 'Dim 1', table_dataset_1])

In [None]:
table

### Weight distribution

In [None]:
table = []
for dataset in datasets:
    table_dataset = []     
    # loading data
    hist_AD = load_data(f'fMRI_results/{dataset}/hist_AD_0.npy')
    hist_AD_range = load_data(f'fMRI_results/{dataset}/hist_AD_1.npy')
    hist_control = load_data(f'fMRI_results/{dataset}/hist_control_0.npy')
    hist_control_range = load_data(f'fMRI_results/{dataset}/hist_control_1.npy')

    # Kolmogorov-Smirnov test
    statistic, p_value = ks_2samp(hist_AD, hist_control)
    print(f"{dataset}, Test Statistic:", statistic)
    print(f"{dataset}, P-value:", p_value)
    table_dataset.append([dataset, p_value])
    table.append(table_dataset)

In [None]:
table

In [None]:
dataset = 'ADNI'
hist_AD = load_data(f'fMRI_results/{dataset}/hist_AD_0.npy')
hist_AD_range = load_data(f'fMRI_results/{dataset}/hist_AD_1.npy')
hist_control = load_data(f'fMRI_results/{dataset}/hist_control_0.npy')
hist_control_range = load_data(f'fMRI_results/{dataset}/hist_control_1.npy')

In [None]:
plt.bar(hist_AD_range[:-1], hist_AD, width=(hist_AD_range[1] - hist_AD_range[0]), label = 'patients', alpha = 0.5)
plt.bar(hist_control_range[:-1], hist_control, width=(hist_control_range[1] - hist_control_range[0]), color='r',
        label = 'controls', alpha = 0.5)
plt.title(f'Weights distribution. {dataset} dataset.')
plt.legend()
plt.show()