In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.gridspec import GridSpec

import pyarrow as pa
import pyarrow.parquet as pq

import seaborn as sns
pd.set_option('display.max_columns', None)

### ADD % NREM DURING SLEEP PERIOD

In [None]:
data_df = pd.read_parquet('metrics/light_metrics.parquet', engine='pyarrow')
print(data_df['room'].unique(),len(sorted(data_df['participant_id'].unique())))

data_df = data_df.rename(columns={'tib_mean_light':'ntib_mean_light', 
                'tib_geometric_mean_light':'ntib_geometric_mean_light', 'before_tib_mean_light':'before_ntib_mean_light',
       'before_tib_geometric_mean_light':'before_ntib_geometric_mean_light', 'after_tib_mean_light':'after_ntib_mean_light',
       'after_tib_geometric_mean_light':'after_ntib_geometric_mean_light'})


In [None]:
light = data_df[data_df['room']=='Lounge']

light.columns = [col_name+'_lounge'if col_name not in ['participant_id', 'room', 'date', 'tib_cluster_number', 'daily_start',
       'metereological_season'] else col_name for col_name in light.columns]
light = light.drop(columns=['room','daily_start','metereological_season','tib_cluster_number'])

for i in['Bedroom','Kitchen','Hallway','Bathroom']:
    light_i = data_df[data_df['room']==i][['participant_id', 'date',  'mean_daily_light',
           'geometric_mean_daily_light', 'half_light', 'time_half_light',
           'half_loglight', 'time_half_loglight', 'hours_below_10', 'hours_10-100',
           'hours_100-500', 'hours_over_500', 'hours_over_1000', 
           'ntib_mean_light', 'ntib_geometric_mean_light',
           'before_ntib_mean_light', 'before_ntib_geometric_mean_light',
           'after_ntib_mean_light', 'after_ntib_geometric_mean_light']]

    light_i.columns = [col_name+'_'+i if col_name not in ['participant_id', 'room', 'date', 'tib_cluster_number', 'daily_start',
           'metereological_season',] else col_name for col_name in light_i.columns]
    
    light_i.columns = [x.lower() for x in light_i.columns]


    light = light.merge(light_i, on=['participant_id','date'],how='outer')
print(len(light), len(light['participant_id'].unique()))

In [None]:
light.head(3)

In [None]:
temp = pd.read_parquet('metrics/temperature_metrics_bedroom_using_cluster.parquet', engine='pyarrow')
temp = temp.drop(columns=['metereological_season','daily_start','tib_cluster_number'])
temp = temp.rename(columns={'tib_mean_temperature':'ntib_mean_temperature',
       'before_tib_mean_temperature':'before_ntib_mean_temperature',
       'after_tib_mean_temperature':'after_ntib_mean_temperature'})

print(len(temp),temp['room'].unique())

In [None]:
temperature = temp[temp['room']=='Lounge']
temperature['amplitude_temperature_lounge'] = temperature['max_daily_temperature']-temperature['min_daily_temperature']
temperature.columns = [col_name+'_lounge'if col_name not in ['participant_id', 'room', 'date','amplitude_temperature_lounge'] else col_name
                       for col_name in temperature.columns]
temperature = temperature.drop(columns=['room'])


for i in ['Bedroom','Kitchen','Hallway','Bathroom']:
    temperature_i = temp[temp['room']==i][['participant_id', 'room', 'date', 'mean_daily_temperature',
       'max_daily_temperature', 'min_daily_temperature','time_max_daily_temperature', 'time_min_daily_temperature',
       'ntib_mean_temperature', 'before_ntib_mean_temperature','after_ntib_mean_temperature']]
    temperature_i['amplitude_temperature_'+i] = temperature_i['max_daily_temperature']-temperature_i['min_daily_temperature']

    temperature_i.columns = [col_name+'_'+i if col_name not in ['participant_id', 'room', 'date','amplitude_temperature_'+i] 
                             else col_name for col_name in temperature_i.columns]
    
    temperature_i = temperature_i.drop(columns=['room'])
    
    temperature_i.columns = [x.lower() for x in temperature_i.columns]


    temperature = temperature.merge(temperature_i, on=['participant_id','date'],how='outer')
print(len(temperature), len(temperature['participant_id'].unique()))

In [None]:
sleep_df = pd.read_parquet('metrics/in_bed_metrics_using_cluster.parquet', engine='pyarrow')
sleep_df = sleep_df.rename(columns={'time_in_bed_duration':'nocturnal_time_in_bed_duration', 'time_in_bed_period':'nocturnal_time_in_bed_period'})

sleep_df['median_sleep_bouts'] = sleep_df['sleep_bouts'].apply(lambda x: np.median(x))
sleep_df['number_of_sleep_bouts'] = sleep_df['sleep_bouts'].apply(lambda x: len(x))

sleep_df['median_duration_of_bed_exits'] = sleep_df['out_bed_bouts'].apply(lambda x: np.median(x))
sleep_df['number_of_bed_exits'] = sleep_df['out_bed_bouts'].apply(lambda x: len(x))

sleep_df['bed_exit_rate'] =  sleep_df['number_of_bed_exits']/sleep_df['nocturnal_time_in_bed_period']
print(len(sleep_df),len(sleep_df['participant_id'].unique()))

In [None]:
sleep_df['median_wake_bouts'] = sleep_df['wake_bouts'].apply(lambda x: np.median(x))
sleep_df['number_of_wake_bouts'] = sleep_df['wake_bouts'].apply(lambda x: len(x))
sleep_df['sleep_efficiency'] = 100* (sleep_df['sleep_duration']/sleep_df['nocturnal_time_in_bed_period'])
sleep_df['nrem_sleep_perc'] = 100* (sleep_df['withings_nrem_sleep_duration']/sleep_df['sleep_period'])
sleep_df[['median_wake_bouts','number_of_wake_bouts','sleep_efficiency','nrem_sleep_perc']]

In [None]:
sleep_df['midpoint_sleep_numeric'] = sleep_df['midpoint_sleep'].dt.hour + (sleep_df['midpoint_sleep'].dt.minute/60)
sleep_df['midpoint_sleep_numeric'] = sleep_df['midpoint_sleep_numeric'].apply(lambda x: x-24 if x>18 else x)
sleep_df['tib_onset_numeric'] = sleep_df['tib_onset'].dt.hour + (sleep_df['tib_onset'].dt.minute/60)
sleep_df['ntib_onset_numeric'] = sleep_df['tib_onset_numeric'].apply(lambda x: x-24 if x>18 else x)
sleep_df['tib_offset_numeric'] = sleep_df['tib_offset'].dt.hour + (sleep_df['tib_offset'].dt.minute/60)
sleep_df['ntib_offset_numeric'] = sleep_df['tib_offset_numeric'].apply(lambda x: x-24 if x>18 else x)
sleep_df['sleep_onset_numeric'] = sleep_df['sleep_onset'].dt.hour + (sleep_df['sleep_onset'].dt.minute/60)
sleep_df['sleep_onset_numeric'] = sleep_df['sleep_onset_numeric'].apply(lambda x: x-24 if x>18 else x)
sleep_df['sleep_offset_numeric'] = sleep_df['sleep_offset'].dt.hour + (sleep_df['sleep_offset'].dt.minute/60)
sleep_df['sleep_offset_numeric'] = sleep_df['sleep_offset_numeric'].apply(lambda x: x-24 if x>18 else x)

sleep_ids = sleep_df['participant_id'].unique()

sleep_df = sleep_df.drop(columns=['check_percs','tib_onset_numeric','tib_offset_numeric'])
print(len(sleep_ids))

In [None]:
print('participants with sleep and bedroom  light ',len(light['participant_id'].unique()))

In [None]:
whole_df = sleep_df.merge(temperature, on=['participant_id','date'],how='outer')
print('participants with sleep and bedroom temperature ', len(whole_df['participant_id'].unique()))
whole_df = whole_df.merge(light, on=['participant_id','date'],how='outer')
print('participants with sleep and bedroom temperature and light ',len( whole_df['participant_id'].unique()))

In [None]:
naps = pd.read_parquet('metrics/naps_metrics_using_cluster_22_JAN_2024.parquet', engine='pyarrow')
short_ntibs = pd.read_parquet('metrics/short_ntibs_metrics_using_cluster_22_JAN_2024.parquet', engine='pyarrow')
print(len(whole_df.columns))

whole_df = whole_df.merge(naps, on=['participant_id','date'],how='outer')
print(len(whole_df.columns))

whole_df = whole_df.merge(short_ntibs, on=['participant_id','date'],how='outer')
print(len(whole_df.columns))

In [None]:
whole_df.columns = [c.replace(' ','_').replace('-','_').replace(')','').replace('(','') for c in whole_df]

whole_df['temperature_category'] = pd.cut(whole_df['ntib_mean_temperature_bedroom'],3, labels=["cold", "medium", "hot"])
print(len(whole_df))

In [None]:
whole_df[whole_df.isnull().any(axis=1)]

whole_df.columns[whole_df.isna().any()].tolist()
whole_df.columns[whole_df.isna().any()].tolist()

In [None]:
whole_df.columns = [c.replace(' ','_').replace('-','_').replace(')','').replace('(','') for c in whole_df]
whole_df.columns = [x.lower() for x in whole_df.columns]

In [None]:
initial_dates = whole_df.groupby('participant_id')['date'].min()
initial_dates = initial_dates.to_frame().rename(columns={'date':'min_date'}).reset_index()
whole_df = whole_df.merge(initial_dates, on=['participant_id'],how='left')
whole_df['time_in_study'] = (whole_df['date'] - whole_df['min_date']).dt.days

whole_df.rename(columns={'withings_wake_state_during_sleep_period':'w_wake_st_dur_sleep_period',
                   'withings_nulls_during_sleep_period':'w_nulls_dur_sleep_period',
                   'withings_nulls_during_sleep_period_percentage': 'w_nulls_dur_sleep_period_perc'}, inplace=True)

In [None]:
whole_df['month'] = pd.to_datetime(whole_df['date']).dt.strftime('%B')#.month

whole_df['bimonth'] ='JanFeb'
whole_df.loc[(whole_df['month']=='March')|(whole_df['month']=='April'), 'bimonth'] = 'MarApr'
whole_df.loc[(whole_df['month']=='May')|(whole_df['month']=='June'), 'bimonth'] = 'MayJun'
whole_df.loc[(whole_df['month']=='July')|(whole_df['month']=='August'), 'bimonth'] = 'JulAug'
whole_df.loc[(whole_df['month']=='September')|(whole_df['month']=='October'), 'bimonth'] = 'SepOct'
whole_df.loc[(whole_df['month']=='November')|(whole_df['month']=='December'), 'bimonth'] = 'NovDec'
whole_df.groupby('bimonth')['month'].apply(lambda x: list(np.unique(x)))

In [None]:
whole_df[['date','bimonth']].groupby('bimonth').count()

In [None]:
whole_df[['date','month']].groupby('month').count()

In [None]:
def season_metereological(date):
    year = str(date.year)
    seasons = {'around-spring-equinox': pd.date_range(start=year+'-02-04', end=year+'-05-05', tz='UTC'),
               'around-summer-solstice': pd.date_range(start=year+'-05-06', end=year+'-08-06', tz='UTC'),
               'around-autumn-equinox': pd.date_range(start=year+'-08-07', end=year+'-11-07', tz='UTC')}
    if date in seasons['around-spring-equinox']:
        return 'around-spring-equinox'
    if date in seasons['around-summer-solstice']:
        return 'around-summer-solstice'
    if date in seasons['around-autumn-equinox']:
        return 'around-autumn-equinox'
    else:
        return 'around-winter-solstice'

In [None]:
whole_df['metereological_season'] = whole_df['date'].apply(lambda x: season_metereological(pd.to_datetime(str(x))))

In [None]:
whole_df[['date','metereological_season']].groupby(['metereological_season']).count()

In [None]:
whole_df.head(3)

In [None]:
whole_df.to_csv('master_06_FEB_2024.csv',index=False)

In [None]:
##FINAL PROCESS TO SAVE file

In [None]:
whole_df.columns[whole_df.isna().any()].tolist()