In [None]:
import numpy as np
import pandas as pd
import datetime
# import dcarte
import pytz
#%load_ext autoreload
import pyarrow as pa
import pyarrow.parquet as pq

from random import randrange
import matplotlib.pyplot as plt
import seaborn as sns
color = sns.color_palette()
pd.options.mode.chained_assignment = None  # default='warn'

pd.set_option('display.max_columns', 500)
pd.set_option('display.max_rows', 6)

In [None]:
sleep = pd.read_parquet('clean_sleep_data.parquet', engine='pyarrow')
sleep = sleep.drop('duplicates', axis=1)
sleep['datetime'] = sleep['start_date_tz']
if sleep['datetime'].dt.tz!='Europe/London':
    sleep['datetime'] = sleep['datetime'].dt.tz_convert(pytz.timezone('Europe/London'))
    
def process_sleep_mat(patient_id, df):
    df = df[df['ID']==patient_id].reset_index(drop=True)

    df.index = pd.DatetimeIndex(df['datetime'])
    df = df.drop(columns=['datetime'])
    df = df.resample('1T').asfreq()
    df = df.rename_axis('datetime').reset_index()
    if df['datetime'].dt.tz!='Europe/London':
        df['datetime'] = df['datetime'].dt.tz_convert(pytz.timezone('Europe/London'))
    df['value'] = np.where(df['state'].isna(), 'bed_out', 'bed_in')
    return df[['ID','datetime', 'state','value','snoring','heart_rate','respiratory_rate']]

In [None]:
sleep.head(3)

In [None]:
br

In [None]:
data_1 = pd.read_parquet('clusters_hour_gap.parquet', engine='pyarrow')
data_1 = data_1.dropna()
data_1['length'] = data_1['length']/60
data_1 = data_1.rename(columns={"Participant ID": "participant_id"})
#hourgap = data_1[(data_1['classification']=='nocturnal')].groupby(['date','Participant ID'], group_keys=False).apply(lambda x: x.loc[x.length.idxmax()])
#data_1.head(3)
print(len(data_1['participant_id'].unique()))

In [None]:
dates = pd.read_parquet('in_bed_metrics_using_cluster.parquet', engine='pyarrow')
dates = dates[['participant_id', 'date', 'tib_onset', 'tib_offset']]
dates.info()

# dates['participant_id'] = dates['participant_id'].astype(str)
# dates['date'] = dates['date'].astype(str)
# data_1['participant_id'] = data_1['participant_id'].astype(str)
# data_1['date'] = data_1['date'].astype(str)
dates_used = dates.merge(data_1, on=['participant_id', 'date'], how='left')
dates_used['tib_onset'] = pd.to_datetime(dates_used['tib_onset'])
dates_used['datetime'] = pd.to_datetime(dates_used['datetime'])
 
dates_used

In [None]:
dates_used['its_a']='in_bed_nap'
dates_used.loc[dates_used['tib_onset']==dates_used['datetime'], 'its_a'] ='NTIB'
dates_used.loc[(dates_used['its_a']=='in_bed_nap')&(dates_used['classification']=='nocturnal'), 'its_a'] ='short_NTIB'
#dates_used

len(dates_used[dates_used['its_a']=='NTIB']),len(dates_used[dates_used['its_a']=='short_NTIB']),len(dates_used[dates_used['its_a']=='in_bed_nap'])

In [None]:
ntib_and_naps_count = dates_used[['participant_id','date','cluster','its_a']].groupby(by=['participant_id','date',
                                        'its_a']).count().unstack(fill_value=0).T.T.reset_index(level=[0,1])
ntib_and_naps_count.columns = ntib_and_naps_count.columns.to_flat_index()

ntib_and_naps_count = ntib_and_naps_count.rename(columns={('participant_id',''):'participant_id',
                                                    ('date',''):'date',
                                                    ('cluster','in_bed_nap'):'in_bed_nap',
                                                    ('cluster','short_NTIB'):'short_NTIB',
                                                    ('cluster', 'NTIB'):'NTIB'})
ntib_and_naps_count

In [None]:
def start_naps_table():
    return pd.DataFrame(columns=['participant_id', 'date','cluster_number','nap_classification',
                     'time_in_bed_duration','tib_onset', 'tib_offset','in_bed_bouts',
                    'sleep_duration','sleep_onset', 'sleep_offset', 'sleep_bouts'])

def append_row(df, row):
    return pd.concat([ df, pd.DataFrame([row], columns=row.index)]).reset_index(drop=True)

def find_bows(df, col_to_find_mismatch, desired_value):
    df['mismatch'] = df[col_to_find_mismatch] != df[col_to_find_mismatch].shift(1)
    transitions = df[df['mismatch']==True].reset_index(drop=True)
    transitions['duration'] = (transitions['datetime'].shift(-1) - transitions['datetime']).dt.total_seconds()/3600
    bows = transitions[(transitions[col_to_find_mismatch]==desired_value)]['duration'].to_list()
    return bows

def prepare_for_bows(df, start, end, col_to_find_mismatch, undesired_value):
    for_transitions = df[df['datetime'].between(start, end)].reset_index(drop=True)                
    empty_df = pd.DataFrame({start-pd.Timedelta(minutes=1):undesired_value, 
                             end+pd.Timedelta(minutes=1):undesired_value}.items(), columns=['datetime', col_to_find_mismatch])
    for_transitions = pd.concat([for_transitions,empty_df])
    return for_transitions.sort_values(by='datetime')

def update_naps_table(old_df, i_df, patient, cluster_df):

    for index, row in cluster_df.iterrows():

        start_dt = row['datetime']
        end_dt = row['end_datetime']  #participant goes out of bed in this minute, and start the outbed cluster

        time_in_bed = i_df.loc[(i_df['datetime']>=start_dt) & 
                                    (i_df['datetime']<=(end_dt-pd.Timedelta(minutes=1)))].reset_index(drop=True)### this needs a minute correction to select only time in bed
        time_in_bed['in_sleep'] = np.where(time_in_bed['state'].isin(['DEEP', 'LIGHT', 'REM']), 'yes', 'no')
        
#         print(time_in_bed[['datetime','state','value']])
        if len(time_in_bed[time_in_bed['in_sleep']=='yes'])<2:
          #  print(len(time_in_bed[time_in_bed['in_sleep']=='yes']))
            continue
            
        sleep_onset = time_in_bed[time_in_bed['in_sleep']=='yes']['datetime'].iloc[0]
        sleep_offset = time_in_bed[time_in_bed['in_sleep']=='yes']['datetime'].iloc[-1]+pd.Timedelta(minutes=1)
        asleep = time_in_bed.loc[(time_in_bed['datetime']>=sleep_onset) &
                                      (time_in_bed['datetime']<=(sleep_offset -pd.Timedelta(minutes=1)))].reset_index(drop=True)  

        tib_period = (end_dt-start_dt).total_seconds()/3600 #  ##in hours
        sleep_period = (sleep_offset-sleep_onset).total_seconds()/3600#  ###in hours
        
        just_periods = time_in_bed[['datetime','state','value']]
        for_inbed_bows = prepare_for_bows(just_periods, time_in_bed['datetime'].iloc[0], time_in_bed['datetime'].iloc[-1],'value', 'bed_out')
        inbed_bows = find_bows(for_inbed_bows, 'value', 'bed_in')
        
        just_periods = time_in_bed[['datetime','state','in_sleep']]
        for_sleep_bows = prepare_for_bows(just_periods, asleep['datetime'].iloc[0], asleep['datetime'].iloc[-1],'in_sleep', 'no')
        sleep_bows = find_bows(for_sleep_bows, 'in_sleep', 'yes')

        new_row = pd.Series({'participant_id':patient, 
                             'date':row['date'], 
                             'cluster_number':row['cluster'],
                             'nap_classification':row['classification'],
                            'time_in_bed_duration':len(time_in_bed[time_in_bed['state'].isin(['DEEP', 'LIGHT', 'REM','AWAKE'])])/60,
                             'tib_onset':start_dt, 
                             'tib_offset':end_dt,
                             'in_bed_bouts':inbed_bows,
                             'sleep_onset':sleep_onset, 
                             'sleep_offset':sleep_offset, 
                             'sleep_duration':len(asleep[asleep['in_sleep']=='yes'])/60,          
                             'sleep_bouts':sleep_bows
                            })

        if abs(sum(inbed_bows)-new_row['time_in_bed_duration'])>0.008:
            print('error in bed bouts in day ',index)
            #print(bed_bows,'\ntib', new_row['Time in bed (hrs)'])
            continue 
            
        if abs(sum(sleep_bows)-new_row['sleep_duration'])>0.008:
            print('error in sleep bouts in day ',index)
          #  print(sleep_bows, sum(sleep_bows), new_row['sleep_duration'])
            continue 
            
        old_df = append_row(old_df, new_row)
        
    return old_df

In [None]:
naps = dates_used[dates_used['its_a']=='in_bed_nap'][['participant_id', 'date','cluster','datetime', 'end_datetime','length', 'classification']]
naps

In [None]:
naps_info = start_naps_table()
ids_list = naps['participant_id'].unique()

for idx,i in enumerate(ids_list):
    if i not in ['']:
        print('current length table:', len(naps_info))
        print(idx,i, len(naps_info))
        
        i_df = process_sleep_mat(i, sleep)
        naps_info = update_naps_table(naps_info, i_df, i, naps[naps['participant_id']==i])
        df2 = pa.Table.from_pandas(naps_info)
        pq.write_table(df2, 'naps_info_22_JAN_2024.parquet', compression='BROTLI')
        
naps_info.head(6)
len(naps_info)

In [None]:
def get_info(x,classification,column_to_sort,column_to_evaluate,largest_place):
    s = x[x['nap_classification']==classification].sort_values(by=[column_to_sort],ascending=False)[column_to_evaluate].to_numpy()
    if len(s)>=largest_place:
        return s[largest_place-1]
    else:
        return np.nan
    
metrics = naps_info.groupby(['participant_id','date']).apply(lambda x: pd.Series({
                                    'total_time_in_bed_during_naps':x['time_in_bed_duration'].sum(),
                                    'total_time_in_sleep_during_naps':x['sleep_duration'].sum(),
                                    'mean_time_in_bed_during_naps':x['time_in_bed_duration'].mean(),
                                    'mean_time_in_sleep_during_naps':x['sleep_duration'].mean(),
                                    'min_time_in_bed_during_naps':x['time_in_bed_duration'].min(),
                                    'min_time_in_sleep_during_naps':x['sleep_duration'].min(),
                                    'number_naps': len(x[x['nap_classification']=='diurnal']),
    
                                    'largest_tib_during_nap': get_info(x,'diurnal','time_in_bed_duration','time_in_bed_duration',1),
                                    'largest_tib_during_nap_onset': get_info(x,'diurnal','time_in_bed_duration','tib_onset',1),
                                    'largest_sleep_during_nap': get_info(x,'diurnal','sleep_duration','sleep_duration',1),
                                    'largest_sleep_during_nap_onset':get_info(x,'diurnal','sleep_duration','sleep_onset',1),
                                    
                                    '2largest_tib_during_nap': get_info(x,'diurnal','time_in_bed_duration','time_in_bed_duration',2),
                                    '2largest_tib__during_nap_onset': get_info(x,'diurnal','time_in_bed_duration','tib_onset',2),
                                    '2largest_sleep_during_nap': get_info(x,'diurnal','sleep_duration','sleep_duration',2),
                                    '2largest_sleep_during_nap_onset':get_info(x,'diurnal','sleep_duration','sleep_onset',2),    
                                            })).reset_index()

metrics['largest_tib_during_nap_onset_numeric'] = metrics['largest_tib_during_nap_onset'].dt.hour + (metrics['largest_tib_during_nap_onset'].dt.minute/60)
metrics['largest_sleep_during_nap_onset_numeric'] = metrics['largest_sleep_during_nap_onset'].dt.hour + (metrics['largest_sleep_during_nap_onset'].dt.minute/60)
metrics['2largest_tib__during_nap_onset_numeric'] = metrics['2largest_tib__during_nap_onset'].dt.hour + (metrics['2largest_tib__during_nap_onset'].dt.minute/60)
metrics['2largest_sleep_during_nap_onset_numeric'] = metrics['2largest_sleep_during_nap_onset'].dt.hour + (metrics['2largest_sleep_during_nap_onset'].dt.minute/60)

df2 = pa.Table.from_pandas(metrics)
pq.write_table(df2, 'naps_metrics_using_cluster_22_JAN_2024.parquet', compression='BROTLI')
    
metrics

metrics = pd.read_parquet('naps_metrics_using_cluster_22_JAN_2024.parquet')
metrics

In [None]:
metrics.columns

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['number_naps'], bins=range(1,6,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
plt.ylim([0,3000])
plt.xticks([1.5,2.5,3.5,4.5,5.5],
           ['1','2','3','4','5'], rotation=15)
plt.xlabel('Number of In-Bed Naps')
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['total_time_in_bed_during_naps'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.ylim([0,2000])
plt.xlabel('Total TIB during In-Bed Naps \n for current day (h)')
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['total_time_in_sleep_during_naps'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.ylim([0,2000])
plt.xlabel('Total time in sleep during In-Bed Naps \n for current day (h)')
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['mean_time_in_bed_during_naps'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Mean TIB during \n In-Bed Naps (h)')
plt.ylim([0,2000])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['mean_time_in_sleep_during_naps'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Mean time in sleep during \n In-Bed Naps (h)')
plt.ylim([0,2000])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['largest_tib_during_nap'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Largest TIB during \n In-Bed Naps (h)')
plt.ylim([0,2000])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['largest_sleep_during_nap'], bins=range(0,16,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Largest time in sleep during \n In-Bed Naps (h)')
plt.ylim([0,2000])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['largest_tib_during_nap_onset_numeric'], bins=range(6,23,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Onset of largest TIB during \n In-Bed Naps')
plt.ylim([0,700])
plt.show()

In [None]:
plt.figure(figsize=(6,4))
plt.hist(metrics['largest_sleep_during_nap_onset_numeric'], bins=range(6,23,1),rwidth=0.9, color='tomato')

#plt.yticks([1,3,5,7,9], rotation=0)
# plt.xticks([0.5,1.5,2.5,3.5,4.5,5.5,6.5,7.5,8.5,9.5,10.5,11.5,12.5,13.5,14.5,15.5],
#            ['0-0.99','1-1.99','2-2.99','3-3.99','4-4.99','5-5.99','6-6.99',
#             '7-7.99','8-8.99','9-9.99','10-10.99','11-11.99','12-12.99','13-13.99','14-14.99','15-15.99'], rotation=35)
plt.xlabel('Onset of largest time in sleep during \n In-Bed Naps')
plt.ylim([0,700])
plt.show()

In [None]:
metrics[['number_naps',
       'largest_tib_during_nap', 'largest_tib_during_nap_onset',
       'largest_sleep_during_nap', 'largest_sleep_during_nap_onset',
       'largest_sleep_during_nap_onset_numeric']].sort_values(by=['largest_sleep_during_nap_onset_numeric'], ascending=False).head(4)

In [None]:
br

In [None]:
fig = plt.figure(figsize=(6,14))
ax1 = fig.add_subplot(111)

# sns.boxplot(data=naps_stats[naps_stats['nap_classification']=='nocturnal'], x='time_in_bed_duration', y='participant_id',
#             hue='nap_classification', ax=ax1,showfliers =True)
sns.boxplot(data=metrics,x='number_naps', y='participant_id', ax=ax1,showfliers =True)

ax1.set_xlabel('Number of In-Bed Naps per Day')