In [8]:
%matplotlib inline
%config IPCompleter.greedy=True

import copy
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math as math
import scipy.stats as st
import auxiliary_functions as aux
from scipy.optimize import curve_fit


pd.options.display.max_rows = 10

# Run the script to load the data (the series and their segmentation) and to delete all segments with indeterminate stage or whose max frequency is below 85%
from load_data import load_data
data, seg_res = load_data()

# Aglutinates the segments statistics of all the subjects, distinguishing them only by their groups.
from aglutinates_statistics_6stages import aglutinates
means, medians, variances, lengths, t_i, t_f, delta_t = aglutinates(data, seg_res)

#### Compute the means, standard deviations and variances.

In [9]:
path = "data/Measures_avg_std_var/"

series_titles = ["SBP", "DBP", "BBI-P", "BBI-EKG"]
values_titles = ["Mean", "Median", "Var", "Length", "T_i", "T_f", "D_t"]
groups_titles = ["Hypertensive", "Normotensive", "Proband"]
measures = [means, medians, variances, lengths, t_i, t_f, delta_t]

col = ["0: Mean", "0: Var", "0: SD", "0: SE",
       "1: Mean", "1: Var", "1: SD", "1: SE",
       "2: Mean", "2: Var", "2: SD", "2: SE",
       "3: Mean", "3: Var", "3: SD", "3: SE",
       "4: Mean", "4: Var", "4: SD", "4: SE",
       "REM: Mean", "REM: Var", "REM: SD", "REM: SE"]

vals = [["0: Mean", "0: Var", "0: SD", "0: SE"],
        ["1: Mean", "1: Var", "1: SD", "1: SE"],
        ["2: Mean", "2: Var", "2: SD", "2: SE"],
        ["3: Mean", "3: Var", "3: SD", "3: SE"],
        ["4: Mean", "4: Var", "4: SD", "4: SE"],
        ["REM: Mean", "REM: Var", "REM: SD", "REM: SE"]]

for group in range(3):
    for series in range(4):
        values = pd.DataFrame(columns = col, index = values_titles)
        
        for x in range(6):
            for y in range(7):
                values[vals[x][0]][values_titles[y]] = measures[y][group][series][x].mean()
                values[vals[x][1]][values_titles[y]] = measures[y][group][series][x].std()
                values[vals[x][2]][values_titles[y]] = measures[y][group][series][x].var()
                values[vals[x][3]][values_titles[y]] = st.sem(measures[y][group][series][x])
                
        values.to_csv(path+groups_titles[group]+"/6 Stages/"+series_titles[series]+".csv")
        
        


##### Kruskal–Wallis one-way analysis of variance

In [10]:
path = "data/tests/kruskal-wallis/6 Stages/"

idx = ["Mean", "Median", "Variance", "Length", "T_i", "T_f", "D_t"]
measures = [means, medians, variances, lengths, t_i, t_f, delta_t]
series_titles = ["SBP", "DBP", "BBI-P", "BBI-EKG"]
groups_titles = ["Hypertensive", "Normotensive", "Proband"]

hypertensive_df = pd.DataFrame(columns = series_titles, index = idx)
normotensive_df = pd.DataFrame(columns = series_titles, index = idx)
proband_df      = pd.DataFrame(columns = series_titles, index = idx)

groups  = [hypertensive_df, normotensive_df, proband_df]

for g in range(3):
    for s in range(4):
        for m in range(len(measures)):
            temp = measures[m][g][s]
            groups[g][series_titles[s]][idx[m]] = st.kruskal(temp[0], temp[1], temp[2], temp[3], temp[4], temp[5])[1]
            
for i in range(3):
    groups[i].to_csv(path+"/"+groups_titles[i]+".csv", decimal = ",")

##### Student t-test

In [11]:
path = "data/tests/t-test/6 Stages/"

stages  = ["Vigil", "N1", "N2", "N3", "N4", "REM"]
groups_titles   = ["Hypertensive", "Normotensive", "Proband"]
series_titles   = ["SBP", "DBP", "BBI-P", "BBI-EKG"]
measures_titles = ["mean", "median", "variance", "length", "T_i", "T_f", "D_t", "alpha"]
measures = [means, medians, variances, lengths, t_i, t_f, delta_t]


# One table for group, series and parameter
for g in range(3):
    for s in range(4):
        for m in range(7):
            table = pd.DataFrame(columns = stages, index = stages)
            val = measures[m][g][s]
            
            for x in range(6):
                for y in range(6):
                    table[stages[x]][stages[y]] = st.ttest_ind(val[x], val[y])[1]
                    
            table.to_csv(path+groups_titles[g]+"/"+series_titles[s]+"/"+measures_titles[m]+".csv", decimal = ',')

##### KS Test

In [12]:
path = "data/tests/ks-test/6 Stages/"

stages  = ["Vigil", "N1", "N2", "N3", "N4", "REM"]
groups_titles   = ["Hypertensive", "Normotensive", "Proband"]
series_titles   = ["SBP", "DBP", "BBI-P", "BBI-EKG"]
measures_titles = ["mean", "median", "variance", "length", "T_i", "T_f", "D_t", "alpha"]
measures = [means, medians, variances, lengths, t_i, t_f, delta_t]


# One table for group, series and parameter
for g in range(3):
    for s in range(4):
        for m in range(7):
            table = pd.DataFrame(columns = stages, index = stages)
            val = measures[m][g][s]
            
            for x in range(6):
                for y in range(6):
                    table[stages[x]][stages[y]] = st.ks_2samp(val[x], val[y])[1]
                    
            table.to_csv(path+groups_titles[g]+"/"+series_titles[s]+"/"+measures_titles[m]+".csv", decimal = ',')