In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import scipy
from scipy import signal, interpolate
import copy
import os 
import pandas as pd
from tqdm import tqdm

from pyCompare._plotBlandAltman import blandAltman  
from sklearn.metrics import cohen_kappa_score
from bishop_peaks_onsets import *
os.chdir('/home/kkotzen/research/PPG_sleepstaging_orion3/')
from pathlib import Path
from src.parsing.MESAParser import MESAParser
from src.parsing.CFSParser import CFSParser
from src.parsing.UHVParser import UHVParser
from src.parsing.SHHSParser import SHHSParser


from src.datasets.mesa_dataset import MESA_Dataset
import pickle

In [4]:
dl = MESAParser()
metadata = dl.metadata
# patients = dl.database_all_patient_IDs
# sleep = {patient:dl.load_sleep(patient) for patient in patients}

In [5]:
[r for r in metadata.columns if 'smok' in r]

[]

In [6]:
probs_file = "/home/kkotzen/tmp/results/f9ec5834998d490aae460ef8ffbe20a8.pkl"
predictions = pickle.load(open(probs_file, "rb"))
sleep_predictions = {patient:predictions["predictions"][i] for i,patient in enumerate(predictions['patients'])}
sleep_labels = {patient:predictions["labels"][i] for i,patient in enumerate(predictions['patients'])}
sleep_encoding =  {0: 0, 1: 1, 2: 1, 3: 2, 4: 3, 9:0}

In [7]:
df = pd.DataFrame()
for patient in tqdm.tqdm(sleep_predictions.keys()):

    ss = sleep_labels[patient]
    ss_pred = sleep_predictions[patient]
    kappa = cohen_kappa_score(ss, ss_pred)
    
    arousals = np.sum([1 for i in range(len(ss)-1) if ss[i+1]<ss[i]])
    arousals_pred = np.sum([1 for i in range(len(ss_pred)-1) if ss_pred[i+1]<ss_pred[i]])
    
#     waso = np.sum(ss[np.where(ss > 0)[0]:np.where(ss > 0)[-1]] > 0
#     waso_pred = 
    
    sex = metadata[metadata.id==patient].sex.values[0]
    age = metadata[metadata.id==patient].age.values[0]

    beta = max(metadata[metadata.id==patient].beta5c.values[0],metadata[metadata.id==patient].betad5c.values[0]) 
    anti_arr = metadata[metadata.id==patient].anara5c.values[0]

    ahi = metadata[metadata.id==patient].ahi.values[0]
    hypertension = metadata[metadata.id==patient].htn5c.values[0]
    diabetic_cat = metadata[metadata.id==patient].dm035c.values[0]
    bmi_cat =  metadata[metadata.id==patient].bmicat5c.values[0]
    insomnia = metadata[metadata.id==patient].insmnia5.values[0]
    apnea = metadata[metadata.id==patient].slpapnea5.values[0]
    race = metadata[metadata.id==patient].race1c_x.values[0]
    smoker = metadata[metadata.id==patient].smkstat5.values[0]
    
    df = df.append({'ID':patient, 'Kappa': kappa,'Gender':sex,"Age":age, 'Race':race, "Smoker": smoker,
                    "Beta Blockers":beta, "Anti Arrythmia": anti_arr,
                    'AHI':ahi, "Hypertension":hypertension, 
                    "Diabetes Category": diabetic_cat, "BMI Category": bmi_cat,
                    "Insomnia": insomnia, 'Apnea':apnea,
                    "Wake": len(ss[ss==0]),"Light": len(ss[ss==1]),"Deep":len(ss[ss==2]),"REM": len(ss[ss==3]), 
                    "Wake_Pred": len(ss_pred[ss_pred==0]),"Light_Pred": len(ss_pred[ss_pred==1]),"Deep_Pred":len(ss_pred[ss_pred==2]),"REM_Pred": len(ss_pred[ss_pred==3]), 
                    "Arousals": arousals, "Arousals_Pred": arousals_pred},ignore_index=True)


def smoker_simplify(x):
    if x == 0:
        return 0
    if x == 1:
        return 1
    if x == 2:
        return 1
    if x == 3:
        return 1
    
def age_categories(x):
    if x <=60:
        return 0
    if x > 60 and x <= 65:
        return 1
    if x > 65 and x <= 70:
        return 2
    if x > 70:
        return 3
    
def ahi_categories(x):
    if x <=5:
        return 0
    if x > 5 and x <= 15:
        return 1
    if x > 15 and x <= 30:
        return 2
    if x > 30:
        return 3

def diab_categories(x):
    if x < 2:
        return 0
    else:
        return 1
    
def gender_swap(x):
    if x == 1:
        return 0
    else:
        return 1
    
df["Age Group"] = df["Age"].apply(lambda x: age_categories(x))
df["Gender"] = df["Gender"].apply(lambda x: gender_swap(x))
df["Diabetes"] = df["Diabetes Category"].apply(lambda x: diab_categories(x))
df["Smoker"] = df["Smoker"].apply(lambda x: smoker_simplify(x))

df["Apnea Severity"] = df["AHI"].apply(lambda x: ahi_categories(x))

df["Total Sleep"] = df["REM"]+df["Light"]+df["Deep"]
df["REM%"] = 100*df["REM"]/df["Total Sleep"]
df["Light%"] = 100*df["Light"]/df["Total Sleep"]
df["Deep%"] = 100*df["Deep"]/df["Total Sleep"]
df["Sleep Efficiency"] = 100*df["Total Sleep"]/(df["Total Sleep"]+df["Wake"])


df["Total Sleep_Pred"] = df["REM_Pred"]+df["Light_Pred"]+df["Deep_Pred"]
df["REM%_Pred"] = 100*df["REM_Pred"]/df["Total Sleep_Pred"]
df["Light%_Pred"] = 100*df["Light_Pred"]/df["Total Sleep_Pred"]
df["Deep%_Pred"] = 100*df["Deep_Pred"]/df["Total Sleep_Pred"]
df["Kappa_Pred"] = df["Kappa"]
df["Sleep Efficiency_Pred"] = 100*df["Total Sleep_Pred"]/(df["Total Sleep_Pred"]+df["Wake_Pred"])    

df["Total Sleep"] = df["Total Sleep"]/120
df["Total Sleep_Pred"] = df["Total Sleep_Pred"]/120


df = df.rename(columns={"Total Sleep": "Total Sleep (hours)", "Total Sleep_Pred": "Total Sleep (hours)_Pred", 
                        "Arousals": "Total Arousals", "Arousals_Pred": "Total Arousals_Pred"})
df = df.rename(columns={c:c.replace("%", " (%)") for c in df.columns if "%" in c})

100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 204/204 [00:02<00:00, 82.82it/s]


In [8]:
df.columns

Index(['ID', 'Kappa', 'Gender', 'Age', 'Race', 'Smoker', 'Beta Blockers',
       'Anti Arrythmia', 'AHI', 'Hypertension', 'Diabetes Category',
       'BMI Category', 'Insomnia', 'Apnea', 'Wake', 'Light', 'Deep', 'REM',
       'Wake_Pred', 'Light_Pred', 'Deep_Pred', 'REM_Pred', 'Total Arousals',
       'Total Arousals_Pred', 'Age Group', 'Diabetes', 'Apnea Severity',
       'Total Sleep (hours)', 'REM (%)', 'Light (%)', 'Deep (%)',
       'Sleep Efficiency', 'Total Sleep (hours)_Pred', 'REM (%)_Pred',
       'Light (%)_Pred', 'Deep (%)_Pred', 'Kappa_Pred',
       'Sleep Efficiency_Pred'],
      dtype='object')

# Prediction vs actual sleep metric

In [42]:
import matplotlib.transforms as mtransforms

plt.close("all")
Xs = ["Total Sleep (hours)", "Light (%)","Deep (%)","REM (%)", "Sleep Efficiency", 'Total Arousals']
                           
fig, axs = plt.subplots(3, 2, figsize=(4,6))
for i, G in enumerate(Xs):
    ax = axs[int(i/2)][i%2]
    
    trans = mtransforms.ScaledTranslation(-20/72, 7/72, fig.dpi_scale_trans)
    ax.text(0.0, 1.0, f"({chr(i+97)})", transform=ax.transAxes + trans,
            fontsize='medium', va='bottom', fontfamily='serif')
    
    m, b = np.polyfit(df[G], df[f"{G}_Pred"], 1)
    ax.plot(df[G], m*df[G] + b, color='orange', alpha=1 )
      
    ax.scatter(df[G], df[f"{G}_Pred"], s = 5, alpha=0.5, c='#6495ED')
    ax.set_title(f'{G}', size=10)
    ax.set_xlabel("Ground Truth", size=9)
    ax.set_ylabel("Prediction", size=9)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    
    mins = min(ax.get_xlim()[0], ax.get_ylim()[0])
    maxs = max(ax.get_xlim()[1], ax.get_ylim()[1])
    ax.set_xlim([mins, maxs])
    ax.set_ylim([mins, maxs])
    ax.tick_params(axis='both', which='major', labelsize=8)

fig.align_ylabels(ax)
plt.tight_layout()
plt.savefig('/home/kkotzen/images/sleep_paper/fig-bland-altman-metrics.pdf')  

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [26]:
Xs = ["Total Sleep (hours)", "Light (%)","Deep (%)","REM (%)", "Sleep Efficiency", 'Total Arousals']

plt.close('all')
for X in Xs:
    df[f"{X}_Error"] =  df[f"{X}_Pred"] - df[X]

err_cols = [c for c in df.columns if "Error" in c]
err_ticks = [c.replace("_Error", "") for c in err_cols]

df_bp = df[err_cols].rename(columns={err_col:err_tick for err_col, err_tick in zip(err_cols, err_ticks)})
fig, ax = plt.subplots(3, 2, figsize=(5,5))
i = 0
for X in Xs:
    dff = df[[X, f"{X}_Error"]]
    m, b = np.polyfit(df[X], df[f"{X}_Error"], 1)
    ax[int(i/2)][i%2].plot(df[X], m*df[X] + b, color='orange', alpha=1 )
    ax[int(i/2)][i%2].scatter(df[X], df[f"{X}_Error"], s = 5, alpha=0.5, c='#6495ED')
    ax[int(i/2)][i%2].set_title(X, size=8)
    ax[int(i/2)][i%2].set_xlabel("Ground Truth", size=8)
    ax[int(i/2)][i%2].set_ylabel("Prediction Error", size=8)
    ax[int(i/2)][i%2].spines['right'].set_visible(False)
    ax[int(i/2)][i%2].spines['top'].set_visible(False)
    i = i+1
fig.align_ylabels(ax)
plt.tight_layout()
plt.savefig('/home/kkotzen/images/sleep_paper/fig-band-altman.pdf') 

Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [16]:
from pyCompare._plotBlandAltman import blandAltman  

Xs = ["Total Sleep (hours)", "Light (%)","Deep (%)","REM (%)", "Sleep Efficiency", 'Total Arousals']

fig, ax = plt.subplots(3, 2, figsize=(8,8))
i = 0
for X in Xs:
    blandAltman(df[f"{X}_Pred"],df[X],  
                      title=X, ax=ax[int(i/2)][i%2])
    i = i+1
plt.tight_layout()
plt.savefig('/home/kkotzen/fig-band-altman.pdf') 



Canvas(toolbar=Toolbar(toolitems=[('Home', 'Reset original view', 'home', 'home'), ('Back', 'Back to previous …

In [None]:
Xs = ["Total Sleep (hours)", "Light (%)","Deep (%)","REM (%)", "Sleep Efficiency", 'Total Arousals']

plt.close('all')
for X in Xs:
    df[f"{X}_Error"] =  df[f"{X}_Pred"] - df[X]

err_cols = [c for c in df.columns if "Error" in c]
err_ticks = [c.replace("_Error", "") for c in err_cols]


df_bp = df[err_cols].rename(columns={err_col:err_tick for err_col, err_tick in zip(err_cols, err_ticks)})
fig, ax = plt.subplots(3, 2, figsize=(3,4))
i = 0
for err_col in df_bp.columns:
    df_bp[err_col].plot.box(ax=ax[int(i/2)][i%2], showfliers=False)
    ax[int(i/2)][i%2].get_xaxis().set_ticks([])
    ax[int(i/2)][i%2].set_title(err_col, size=8)
    i = i+1
plt.tight_layout()
plt.savefig('/home/kkotzen/fig-sleep-metrics-error-box.pdf')  

In [None]:
np.unique(df.Smoker.values, return_counts=True)

# Sridhar et al images

In [None]:
Gs={}
Gs["Gender"]={0:"Male", 1:"Female"}
Gs["Age Group"]={0:"<55", 1:"55-60",2:"61-70",3:">70"}
Gs['Apnea Severity']={0:"Healthy", 1:"Mild", 2:"Moderate", 3:"Severe"} 
Gs["Hypertension"]={0:"Absence", 1:"Presence"}
Xs = ["Light (%)","Deep (%)","REM (%)",  "Sleep Efficiency"]

counts = {}
for G in Gs.keys():
    count = df.groupby(G).count()["ID"].values
#     counts[G]=', '.join([f'{g} n={v}' for g,v in zip(Gs[G].values(), count)])
    for i, c in enumerate(count):
        Gs[G][list(Gs[G].keys())[i]]=f"(n={c})\n{list(Gs[G].values())[i]}"
        
print(Gs)

plt.close("all")
fig, ax = plt.subplots(len(Gs), len(Xs), figsize=(12,8))
for i, G in enumerate(Gs.keys()):
    for j, X in enumerate(Xs):
        x = range(df.groupby(G).mean()[f"{X}"].shape[0])
        if not "Kappa" in X:
            ax[i][j].errorbar(x = x, y = df.groupby(G).mean()[f"{X}"], yerr=df.groupby(G).std()[f"{X}"], marker="*", color="b")
        ax[i][j].errorbar(x = x, y= df.groupby(G).mean()[f"{X}_Pred"],yerr=df.groupby(G).std()[f"{X}_Pred"], marker="o", color="r")
        ax[i][j].set_title(f'{X} vs {G}', size=8)
        ax[i][j].set_ylabel(f'{X}', size=8)
        ax[i][j].set_xlabel(f'{G}', size=8)
        ax[i][j].set_xticks(range(len(Gs[G])))
        ax[i][j].set_xticklabels(Gs[G].values(), size=8)
        ax[i][j].set_xlim([range(len(Gs[G]))[0]-0.5, range(len(Gs[G]))[-1]+0.5])
        
plt.tight_layout()

plt.savefig('/home/kkotzen/fig-clinical-correlations.pdf')  

In [None]:
Gs = {}
Gs["Race"] = {1: "White", 2: "Chinese", 3: "Black", 4: "Hispanic"}
Gs["Smoker"] = {0: "No", 1: "Yes"}
Gs["Beta Blockers"]= {0: "No", 1: "Yes"}
Gs["Diabetes"]= {0:"Absence", 1:"Presence"}
Gs["Insomnia"]= {0:"Absence", 1:"Presence"}
# Gs["Apnea"]= {0:"Absence", 1:"Presence"}

Xs = ["Light%","Deep%","REM%",  "Sleep Efficiency"]

counts = {}
for G in Gs.keys():
    count = df.groupby(G).count()["ID"].values
#     counts[G]=', '.join([f'{g} n={v}' for g,v in zip(Gs[G].values(), count)])
    for i, c in enumerate(count):
        Gs[G][list(Gs[G].keys())[i]]=f"(n={c})\n{list(Gs[G].values())[i]}"
        
plt.close("all")
fig, ax = plt.subplots(len(Gs), len(Xs), figsize=(12,12))
for i, G in enumerate(Gs.keys()):
    for j, X in enumerate(Xs):
        x = range(df.groupby(G).mean()[f"{X}"].shape[0])
        if not "Kappa" in X:
            ax[i][j].errorbar(x = x, y = df.groupby(G).mean()[f"{X}"], yerr=df.groupby(G).std()[f"{X}"], marker="*", color="b")
        ax[i][j].errorbar(x = x, y= df.groupby(G).mean()[f"{X}_Pred"],yerr=df.groupby(G).std()[f"{X}_Pred"], marker="o", color="r")
        ax[i][j].set_title(f'{X} vs {G}', size=8)
        ax[i][j].set_ylabel(f'{X}', size=8)
        ax[i][j].set_xlabel(f'{G}', size=8)
        ax[i][j].set_xticks(range(len(Gs[G])))
        ax[i][j].set_xticklabels(Gs[G].values(), size=8)
        ax[i][j].set_xlim([range(len(Gs[G]))[0]-0.5, range(len(Gs[G]))[-1]+0.5])
        
plt.tight_layout()


In [None]:
Gs = {}
Gs["Gender"]={0:"Male", 1:"Female"}
Gs["Age Group"]={0:"<55", 1:"55-60",2:"61-70",3:">70"}
Gs["Race"] = {1: "White", 2: "Chinese", 3: "Black", 4: "Hispanic"}
Gs["Smoker"] = {0: "No", 1: "Yes"}
Gs['Apnea Severity']={0:"Healthy", 1:"Mild", 2:"Moderate", 3:"Severe"} 
Gs["Hypertension"]={0:"Absence", 1:"Presence"}
Gs["Beta Blockers"]= {0: "No", 1: "Yes"}
Gs["Diabetes"]= {0:"Absence", 1:"Presence"}
Gs["Insomnia"]= {0:"Absence", 1:"Presence"}
Gs["Apnea"]= {0:"Absence", 1:"Presence"}

X= "Kappa"

counts = {}
for G in Gs.keys():
    count = df.groupby(G).count()["ID"].values
    for i, c in enumerate(count):
        Gs[G][list(Gs[G].keys())[i]]=f"(n={c})\n{list(Gs[G].values())[i]}"
        
print(Gs)

plt.close("all")
fig, ax = plt.subplots(3,4, figsize=(12,8))
j = 0
for i, G in enumerate(Gs.keys()):
    if i == 4 or i ==8: 
        j = j + 1
    i = i - j*4
    x = range(df.groupby(G).mean()[f"{X}"].shape[0])
    ax[j][i].errorbar(x = x, y= df.groupby(G).mean()[f"{X}_Pred"],yerr=df.groupby(G).std()[f"{X}_Pred"], marker="o", color="r")
    ax[j][i].set_title(f'{X} vs {G}', size=10)
    ax[j][i].set_ylabel(f'{X}', size=10)
    ax[j][i].set_xlabel(f'{G}', size=10)
    ax[j][i].set_xticks(range(len(Gs[G])))
    ax[j][i].set_xticklabels(Gs[G].values(), size=10)
    ax[j][i].set_xlim([range(len(Gs[G]))[0]-0.5, range(len(Gs[G]))[-1]+0.5])

ax[2][2].axis('off')
ax[2][3].axis('off')

plt.tight_layout()


In [None]:
dl = MESAParser()
metadata = dl.metadata
patients = dl.database_all_patient_IDs
sleep = {patient:dl.load_sleep(patient) for patient in patients}


In [None]:
df_count = pd.DataFrame()
for patient in tqdm.tqdm(patients): 
    unique, counts = np.unique(sleep[patient], return_counts=True)
    z = {f"{u}":c for u,c in zip(unique, counts)}
    df_count = df_count.append({"ID": patient, **z}, ignore_index=True)

In [None]:
df_count.sort_values(by=['9.0'], ascending=False)

In [None]:
%load_ext autoreload
%autoreload 2
%matplotlib widget
import matplotlib.pyplot as plt
import numpy as np
import scipy
from scipy import signal, interpolate
import copy
import os 
import pandas as pd
from tqdm import tqdm
from sklearn.metrics import cohen_kappa_score

os.chdir('/home/kkotzen/research/PPG_Sleep_staging_Orion2_windows/')
from pathlib import Path
from src.parsing.MESAParser import MESAParser
from src.parsing.CFSParser import CFSParser
from src.parsing.UHVParser import *


In [None]:
sleep_stages = {0:"Wake", 1:"Light", 2:"Light", 3:"Deep", 4:"REM"}
dl = MESAParser()
metadata = dl.metadata
patients = dl.database_all_patient_IDs
df_mesa = pd.DataFrame()
for patient in tqdm.tqdm(patients):
    sleep = dl.load_sleep(patient)
    sleep[sleep>4] = 0
    sleep = np.array([sleep_stages[s] for s in sleep])
    stage, count = np.unique(sleep, return_counts=True)
    
    age = metadata[metadata.id==patient].age.values[0]
    ahi = metadata[metadata.id==patient].ahi.values[0]

    df_mesa = df_mesa.append({"Age":age ,"AHI":ahi,**{s:int(c*100) for s,c in zip(stage,count/sum(count))}}, ignore_index=True)
    
display(df_mesa[["Age", "AHI", "Wake", "Light", "Deep", "REM"]].describe().astype(int))
display(df_mesa[['Wake', 'Light', 'Deep', 'REM']].sum()/(df_mesa[['Wake', 'Light', 'Deep', 'REM']].sum().sum()))

In [None]:
sleep_stages = {0:"Wake", 1:"Light", 2:"Light", 3:"Deep", 4:"REM"}
dl = CFSParser()
metadata = dl.metadata
patients = dl.database_all_patient_IDs
df_cfs = pd.DataFrame()
for patient in tqdm.tqdm(patients):
    sleep = dl.load_sleep(patient)
    sleep[sleep>4] = 0
    sleep = np.array([sleep_stages[s] for s in sleep])
    stage, count = np.unique(sleep, return_counts=True)
    
    age = metadata[metadata.id==patient].age.values[0]
    ahi = metadata[metadata.id==patient].ahi.values[0]

    df_cfs = df_cfs.append({"Age":age ,"AHI":ahi,**{s:int(c*100) for s,c in zip(stage,count/sum(count))}}, ignore_index=True)
    
df_cfs[["Age", "AHI", "Wake", "Light", "Deep", "REM"]].describe().astype(int)
display(df_cfs[['Wake', 'Light', 'Deep', 'REM']].sum()/(df_cfs[['Wake', 'Light', 'Deep', 'REM']].sum().sum()))

In [None]:
sleep_stages = {0:"Wake", 1:"Light", 2:"Light", 3:"Deep", 4:"REM"}
dl = UHVParser()
metadata = dl.metadata
patients = dl.database_all_patient_IDs
df_uhv = pd.DataFrame()
for patient in tqdm(patients):
    sleep = dl.load_sleep(patient)
    L = len(sleep)
    sleep = sleep[0:min(1200, L)]
    sleep[sleep>4] = 0
    sleep = np.array([sleep_stages[s] for s in sleep])
    stage, count = np.unique(sleep, return_counts=True)
    patient = patient.zfill(4)
    age = metadata[metadata.id==patient].age.values[0]
    ahi = metadata[metadata.id==patient].ahi.values[0]

    df_ss = df_uhv.append({"Age":age ,"AHI":ahi,**{s:int(c*100) for s,c in zip(stage,count/sum(count))}}, ignore_index=True)
    
df_uhv[["Age", "AHI", "Wake", "Light", "Deep", "REM"]].describe().astype(int)

In [None]:
sleep_stages = {0:"Wake", 1:"Light", 2:"Light", 3:"Deep", 4:"REM"}
dl = SHHSParser()
metadata = dl.metadata
patients = dl.database_all_patient_IDs
df_shhs = pd.DataFrame()
for patient in tqdm.tqdm(patients):
    sleep = dl.load_sleep(patient)
    sleep[sleep>4] = 0
    L = len(sleep)
    sleep = sleep[0:min(1200, L)]
    sleep = np.array([sleep_stages[s] for s in sleep])
    stage, count = np.unique(sleep, return_counts=True)
    patient = patient.zfill(4)
    age = metadata[metadata.id==patient].age.values[0]
    ahi = metadata[metadata.id==patient].ahi.values[0]

    df_shhs = df_shhs.append({"Age":age ,"AHI":ahi,**{s:int(c*100) for s,c in zip(stage,count/sum(count))}}, ignore_index=True)
    

In [None]:
df_shhs[['Wake', 'Light', 'Deep', 'REM']].sum()/df_shhs[['Wake', 'Light', 'Deep', 'REM']].sum().sum()

In [None]:
df_shhs[["Age", "AHI", "Wake", "Light", "Deep", "REM"]].describe().astype(int)

In [None]:
df_mean = pd.DataFrame()
df_mean = df_mean.append(df_mesa.describe().loc['mean'], ignore_index=True)
df_mean = df_mean.append(df_cfs.describe().loc['mean'], ignore_index=True)
df_mean = df_mean.append(df_uhv.describe().loc['mean'], ignore_index=True)
df_mean['Database'] = ["MESA", "CFS", "UHV"]

In [None]:
df_mean.set_index("Database").T.astype(int)

In [None]:
signal = [ 0.02733224  0.04415357  0.01877674  0.01252067  0.00427673  0.00063101, -0.00340614 -0.01462032 -0.0420445  -0.05445285 -0.05848426 -0.05293171, -0.0470555  -0.04154022 -0.03886274 -0.02706452 -0.02130373 -0.01734306, -0.01543858 -0.01642765 -0.01365838 -0.01440898 -0.01305822 -0.0126274, -0.01800367 -0.00933407  0.02925392  0.04327351  0.03564565  0.02902274,  0.023693    0.01491019  0.01186096  0.01508265  0.02785736  0.04554284,  0.02130335  0.01325256  0.00495215  0.00143763 -0.00316563 -0.01156667, -0.03916301 -0.0531692  -0.05868149 -0.0537958  -0.04805329 -0.04167048, -0.03986189 -0.02861848 -0.02175688 -0.01776918 -0.01550705 -0.0164148, -0.01409336 -0.01402298 -0.01365238 -0.01207183 -0.01750164 -0.01248499,  0.02401167  0.04404541  0.0361365   0.03028069  0.02399787  0.01684959,  0.01076118  0.01536139  0.02410212  0.04636229  0.02412428  0.01386852,  0.00604072  0.00193484 -0.00288783 -0.00901356 -0.0356952  -0.05207615, -0.05855218 -0.05463582 -0.04894619 -0.04216251 -0.0...
          

# Hypnograms for paper

In [None]:
def duplicate(x):
    return np.array([[x_] * 2 for x_ in x]).flatten()


def hold_line(t, x):
    t = [i for i in t]
    x = [i for i in x]

    if len(t) - len(x) == 0:
        t.append(t[-1] + t[-1] - t[-2])
    if len(t) - len(x) != 1:
        raise ValueError()

    t_ = duplicate(t)[1:-1]
    x_ = duplicate(x)
    return t_, x_

def label_hypnogram(ax, ticks):
    ax.set_xlabel('Time (Hours)')
    ax.set_yticks(list(ticks.keys()))
    ax.set_yticklabels(list(ticks.values()))
#     ax.spines['right'].set_visible(False)
#     ax.spines['top'].set_visible(False)
    
def plot_hypnogram(label, prediction, patient, ticks):
    f, ax = plt.subplots(2, 1, figsize=(8, 5))
    
    ax[0].set_title(f"Expert Scored Hypnogram for patient {patient}.")
    t_, label_ = hold_line(np.arange(label.shape[0]), label)
    t_ = t_/120
    ax[0].plot(t_, label_, color='black', label='Labels')
    ax[0].legend(loc='upper left', bbox_to_anchor=(0, 1.1),
          ncol=1, fancybox=True, shadow=True)
    label_hypnogram(ax[0], ticks)
    
    ax[1].set_title(f"Model Scored Hypnogram for patient {patient}.")
    t_, preds_ = hold_line(np.arange(prediction.shape[0]), prediction)
    t_ = t_/120
    ax[1].plot(t_, preds_, color='black', label='Prediction')
    mistakes_x = [i for i in range(len(prediction)) if (prediction[i] != label[i])]
    mistakes_pred = label[mistakes_x]
    ax[1].plot(np.array(mistakes_x)/120, mistakes_pred, '.', color='red', alpha=0.6, label='Correction')
    ax[1].legend(loc='upper left', bbox_to_anchor=(0, 1.2),
          ncol=1, fancybox=True, shadow=True)
    label_hypnogram(ax[1], ticks)

    plt.tight_layout()


In [None]:
probs_file = "/home/kkotzen/tmp/results/f9ec5834998d490aae460ef8ffbe20a8.pkl"
predictions = pickle.load(open(probs_file, "rb"))
sleep_predictions = {patient:predictions["predictions"][i] for i,patient in enumerate(predictions['patients'])}
sleep_labels = {patient:predictions["labels"][i] for i,patient in enumerate(predictions['patients'])}
sleep_encoding =  {0: 0, 1: 1, 2: 1, 3: 2, 4: 3, 9:0}
sleep_decoding = {0: 'Wake', 1: 'Light', 2:'Deep', 3:'REM'}

for patient in tqdm.tqdm(sleep_labels.keys()):
    plt.close('all')
    kappa = cohen_kappa_score(sleep_labels[patient], sleep_predictions[patient])
    accuracy = np.sum([1 for i,j in zip(sleep_labels[patient], sleep_predictions[patient]) if i==j])/len(sleep_labels[patient])
    accuracy = int(accuracy*100)
    plot_hypnogram(sleep_labels[patient], sleep_predictions[patient], patient, sleep_decoding)
    plt.savefig(f'/home/kkotzen/images/sleep_paper/hypnograms/fig-hypnogram--{kappa}-{accuracy}-{patient}.pdf') 

# Print a pretty hypnogram

In [None]:
dl = UHVParser()
patients = dl.database_all_patient_IDs

In [None]:
def pretty_hypno(patient):
    sleep = dl.load_sleep(patient)
    sleep[sleep>4] = 0
    t = np.arange(0, len(sleep)*30, 30)/3600
#     plt.close('all')
    fig, ax = plt.subplots(1,1,figsize=(8,4))
    ax.plot(t, sleep)
    ax.set_xticklabels(ax.get_xticks().astype(int), size=12)
    ax.set_yticks([0,1,2,3,4])
    ax.set_yticklabels(['Wake', "N1", "N2", "N3", "REM"],size=12)
    ax.set_xlabel("Time Sleeping (Hours)", size=12)
    ax.set_ylabel("Sleep Stage (AASM)", size=12)
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    plt.tight_layout()
    plt.show()