The objective is to analyze if there are any pair of correlated parameters. The code is adapted for the 4 stages classification.

In [3]:
%matplotlib inline
%config IPCompleter.greedy=True

import copy
import os
import numpy as np
import pandas as pd
import matplotlib as mpl
import matplotlib.pyplot as plt
import seaborn as sns
import math as math
import scipy.stats as st
import auxiliary_functions as aux
from scipy.optimize import curve_fit


pd.options.display.max_rows = 10


# Run the script to load the data (the series and their segmentation) and to delete all segments with indeterminate stage or whose max frequency is below 85%
from load_data import load_data, redo_classification
data, seg_res = load_data()
# Reclassificates the segments using the 4 stage system.
seg_res = redo_classification(seg_res, data)

In [4]:
# Extract the titles of all parameters
cols = list(seg_res[0][0][0].columns)

# Create structures to save the data of the 4 series, separeted by group.
seg_H = [pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols)]
seg_N = [pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols)]
seg_P = [pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols),
         pd.DataFrame(columns = cols)]
temp = [seg_H, seg_N, seg_P]

# Concatenate all dataframes of the segmentation data.
for i in [0, 1, 2]:
    for j in range(len(seg_res[i])):
        for k in range(4):
            temp[i][k] = pd.concat([temp[i][k], seg_res[i][j][k]])
            

# Rename and reorder the parameters in a convenient way
cols = ["L", "Dt", "T_i", "T_f", "Mean", "Median", "Var", "Stage"]            
for i in range(3):
    for j in range(4):
        temp[i][j] = temp[i][j].drop(columns = ["start", "finish", "0_%", "1_%", "2_%", "3_%", "4_%", "REM_%", "66_%"])
        temp[i][j]["L"] = temp[i][j]["size"]; temp[i][j] = temp[i][j].drop(columns = ["size"])
        temp[i][j]["Mean"] = temp[i][j]["mean"]; temp[i][j] = temp[i][j].drop(columns = ["mean"])
        temp[i][j]["Var"] = temp[i][j]["variance"]; temp[i][j] = temp[i][j].drop(columns = ["variance"])
        temp[i][j]["Stage"] = temp[i][j]["SleepStage"]; temp[i][j] = temp[i][j].drop(columns = ["SleepStage"])
        temp[i][j] = temp[i][j][cols].where(temp[i][j][cols]["Stage"] != 66.0).dropna()
        
        #temp[i][j] = temp[i][j].drop(temp[i][j].where(temp[i][j]["Stage"] == 66.0).dropna().index)

        
# Correct the index
for i in range(3):
    for j in range(4):
        temp[i][j].index = pd.Index(list(range(1, len(temp[i][j]) + 1)))
        
# Substitute the values of some parameters by their logarithm
for i in range(3):
    for j in range(4):
        temp[i][j]["Mean"] = np.log(temp[i][j]["Mean"].values)
        temp[i][j]["Median"] = np.log(temp[i][j]["Median"].values)
        temp[i][j]["Var"] = np.log(temp[i][j]["Var"].values)
        temp[i][j]["L"] = np.log(temp[i][j]["L"].values)
        temp[i][j]["Dt"] = np.log(temp[i][j]["Dt"].values)

In [7]:
fig, ax = plt.subplots(ncols = 1, nrows = 1)

colors = ["gold", "steelblue", "purple", "black"]
g_t = ["Hypertensive", "Normotensive", "Proband"]
serie_t = ["SBP", "DBP", "BBI-P", "BBI-EKG"]
cols = ["L", "Dt", "T_i", "T_f", "Mean", "Median", "Var"] 
path = "Plots/Measures/4 Groups/Pairs of Parameters/"


# Compute all subsets with size 2 from the set of parameters.
par_powerset = [x for x  in list(aux.powerset(cols)) if len(x) == 2]

# Plot and save a scatter plot of all pairs of parameters
for g in range(3):
    df = temp[g]
    for serie in range(4):
        for par in par_powerset:
            for s in range(4):
                ax.scatter(df[serie][par[0]].where(df[serie]["Stage"] == s),
                           df[serie][par[1]].where(df[serie]["Stage"] == s), color = colors[s], marker = '.')

            ax.set_xlabel(par[0]);ax.set_ylabel(par[1])
            fig.savefig(path+g_t[g]+"/"+serie_t[serie]+"/"+par[0]+"_vs_"+par[1]+".png")
            ax.cla()

plt.close()