In [3]:
import os, sys

current_path = os.getcwd()
sys.path.append(os.path.split(current_path)[0])

In [4]:
import glob
import numpy as np
import pandas as pd
import plotly.graph_objects as go

from nimhans.utils import natural_keys

In [2]:
data_path = r'C:\Users\likit\OneDrive\Desktop\edf'
ann_files = glob.glob(f'{data_path}/*.xlsx')
ann_files.sort(key=natural_keys)

## Sleep Staging

In [7]:
# Epochs Length Distribution

stages_len = {}

for i, sub in enumerate(ann_files):
    ann = pd.read_excel(ann_files[i], sheet_name="Sleep profile")[7:]
    ann.reset_index(inplace=True, drop=True)
    ann.columns = ["timestamp", "stage"]
    ann_list = ann["stage"].tolist()
    stages_len[i] = len(ann_list)

subjects = list(stages_len.keys())
lengths = list(stages_len.values())

In [8]:
# Bar plot

bar_plot = go.Bar(x=subjects, y=lengths, name='Sleep Staging')
bar_layout = go.Layout(title='Epoch Lengths for Sleep Staging', xaxis=dict(title='Subjects'), yaxis=dict(title='Length'))
bar_figure = go.Figure(data=[bar_plot], layout=bar_layout)
bar_figure.show()

In [9]:
# Scatter plot

scatter_plot = go.Scatter(x=subjects, y=lengths, mode='markers', name='Sleep Staging')
scatter_layout = go.Layout(title='Epoch Lengths for Sleep Staging', xaxis=dict(title='Subjects'), yaxis=dict(title='Length'))
scatter_figure = go.Figure(data=[scatter_plot], layout=scatter_layout)
scatter_figure.show()

In [10]:
# Line plot

line_plot = go.Scatter(x=subjects, y=lengths, mode='lines', name='Sleep Staging')
line_layout = go.Layout(title='Epoch Lengths for Sleep Staging', xaxis=dict(title='Subjects'), yaxis=dict(title='Length'))
line_figure = go.Figure(data=[line_plot], layout=line_layout)
line_figure.show()

In [132]:
# Sleep Stage Distribution

stages= {}

for i, sub in enumerate(ann_files):
    ann = pd.read_excel(ann_files[i], sheet_name="Sleep profile")[7:]
    ann.reset_index(inplace=True, drop=True)
    ann.columns = ["timestamp", "stage"]
    ann_list = ann["stage"].tolist()
    stages[i] = ann_list

df  = pd.DataFrame.from_dict(stages, orient='index').transpose()
df.columns = list(range(1, len(ann_files)+1))

df

Unnamed: 0,1,2,3,4,5,6,7,8,9,10,...,91,92,93,94,95,96,97,98,99,100
0,A,A,A,A,A,A,A,A,A,A,...,A,A,A,A,A,A,A,A,A,A
1,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,...,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake
2,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,...,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake
3,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,...,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake
4,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,...,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake,Wake
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1174,,,,,,,,,,,...,,,,,,,,,,
1175,,,,,,,,,,,...,,,,,,,,,,
1176,,,,,,,,,,,...,,,,,,,,,,
1177,,,,,,,,,,,...,,,,,,,,,,


In [133]:
stages_per_sub = {}

for i in range(1, len(ann_files)+1):
    stages_per_sub[i] = df[i].value_counts()

stages_per_sub_df = pd.DataFrame.from_dict(stages_per_sub, orient='index')
stages_per_sub_df.fillna(0, inplace=True)
stages_per_sub_df = stages_per_sub_df.astype(int)
stages_per_sub_df = stages_per_sub_df.reindex(['Wake', 'N1', 'N2', 'N3', 'REM', 'A', 'Movement', 'Artefact'], axis=1)
stages_per_sub_df.rename(columns={'Artefact': 'Artifact'}, inplace=True)

stages_per_sub_df

Unnamed: 0,Wake,N1,N2,N3,REM,A,Movement,Artifact
1,254,72,364,25,2,1,0,0
2,628,73,350,39,16,12,0,0
3,147,357,384,22,54,10,0,0
4,104,105,446,177,175,19,0,0
5,59,87,421,154,108,1,0,0
...,...,...,...,...,...,...,...,...
96,385,17,329,80,29,1,0,1
97,224,62,472,61,101,1,0,0
98,227,332,271,0,132,9,0,0
99,144,120,394,30,188,34,0,0


In [134]:
# Stages for all subjects

all_stages = stages_per_sub_df.sum(axis=0)
total_stages_df = pd.DataFrame(all_stages)
total_stages_df.reset_index(inplace=True)
total_stages_df.columns = ['Stage', 'Total Stages']

total_epochs = total_stages_df['Total Stages'].sum()
total_stages_df['Percentage'] = (total_stages_df['Total Stages'] / total_epochs) * 100
total_stages_df['Percentage'] = total_stages_df['Percentage'].round(3)

total_stages_df

Unnamed: 0,Stage,Total Stages,Percentage
0,Wake,25076,26.256
1,N1,9471,9.917
2,N2,39329,41.18
3,N3,8328,8.72
4,REM,11226,11.754
5,A,2009,2.104
6,Movement,1,0.001
7,Artifact,65,0.068


In [135]:
fig = go.Figure(data=go.Bar(x=total_stages_df['Stage'], y=total_stages_df['Total Stages'], text=total_stages_df['Percentage'], textposition='auto'))
fig.update_layout(title='Number of Sleep Stages for Total Subjects', xaxis=dict(title='Sleep Stages'), yaxis=dict(title='Number of Stages'), width=800, height=500)
fig.show()

In [136]:
# Stages for each subject

stages_per_sub_df.drop(['Movement', 'Artifact', 'A'], axis=1, inplace=True)

stages_per_sub_df

In [147]:
# Stacked bar plot

fig = go.Figure(data=[
    go.Bar(name='Wake', x=stages_per_sub_df.index, y=stages_per_sub_df['Wake']),
    go.Bar(name='N1', x=stages_per_sub_df.index, y=stages_per_sub_df['N1']),
    go.Bar(name='N2', x=stages_per_sub_df.index, y=stages_per_sub_df['N2']),
    go.Bar(name='N3', x=stages_per_sub_df.index, y=stages_per_sub_df['N3']),
    go.Bar(name='REM', x=stages_per_sub_df.index, y=stages_per_sub_df['REM'])
])
fig.update_layout(
    title='Sleep Stage Distribution by Subjects',
    xaxis=dict(title='Subjects'),
    yaxis=dict(title='Sleep Stages'),
    barmode='stack', 
    height=700
)
fig.show()

In [152]:
# Grouped Bar Plot

fig = go.Figure(data=[
    go.Bar(name='Wake', x=stages_per_sub_df.index, y=stages_per_sub_df['Wake']),
    go.Bar(name='N1', x=stages_per_sub_df.index, y=stages_per_sub_df['N1']),
    go.Bar(name='N2', x=stages_per_sub_df.index, y=stages_per_sub_df['N2']),
    go.Bar(name='N3', x=stages_per_sub_df.index, y=stages_per_sub_df['N3']),
    go.Bar(name='REM', x=stages_per_sub_df.index, y=stages_per_sub_df['REM'])
])
fig.update_layout(
    title='Sleep Stage Distribution by Subjects',
    xaxis=dict(title='Subjects'),
    yaxis=dict(title='Sleep Stages'),
    barmode='group', 
    height=700,
)
fig.show()

In [158]:
# Box Plot

fig = go.Figure()

for column in stages_per_sub_df.columns:
    fig.add_trace(go.Box(
        y=stages_per_sub_df[column],
        name=column
    ))
fig.update_layout(
    title='Distribution of Sleep Stages Across Subjects',
    yaxis=dict(title='Number of Stages'),
    xaxis=dict(title='Sleep Stages'),
    width=800,
    height=600
)
fig.show()

In [163]:
# Heatmap

correlation_matrix = stages_per_sub_df.corr()

fig = go.Figure(data=go.Heatmap(
    x=stages_per_sub_df.columns,
    y=stages_per_sub_df.columns,
    z=correlation_matrix.values,
    colorscale='Viridis'
))
fig.update_layout(
    title='Correlation between Sleep Stages',
    xaxis=dict(title='Sleep Stages'),
    yaxis=dict(title='Sleep Stages'),
    width=600,
    height=600
)
fig.show()

In [166]:
# Violin Plot

fig = go.Figure()

for column in stages_per_sub_df.columns:
    fig.add_trace(go.Violin(
        y=stages_per_sub_df[column],
        name=column,
        box_visible=True,
        meanline_visible=True
    ))
fig.update_layout(
    title='Distribution of Sleep Stages',
    yaxis=dict(title='Number of Stages'), 
    xaxis=dict(title='Sleep Stages'),
    height=800
)
fig.show()