In [None]:
import dcarte

import pandas as pd
import numpy as np
# import glob
# import os

import pyarrow as pa
import pyarrow.parquet as pq

import datetime
import pytz

from astral import LocationInfo
from astral.sun import sun

import seaborn as sns
pd.options.mode.chained_assignment = None  # default='warn'

In [None]:
sleep_df = pd.read_parquet('in_bed_metrics_using_cluster.parquet', engine='pyarrow')
ids = sleep_df['participant_id'].unique()
len(ids)

In [None]:
sleep_df.head(3)

In [None]:
print(len(sleep_df))

In [None]:
mapping = {'bathroom1': 'Bathroom', 
        'WC1': 'Bathroom',
        'kitchen': 'Kitchen',
        'hallway': 'Hallway',
        'corridor1': 'Hallway',
        'dining room': 'Dining Room',
        'living room': 'Living Room',
        'lounge': 'Lounge',
        'study': 'Study',
        'office': 'Office',#
        'conservatory': 'Conservatory',
        'bedroom1': 'Bedroom',
        'main door':'Front door',
        'front door': 'Front door',
        'back door': 'Back door',
        'cellar': 'Cellar',
          'garage': 'Garage',
          'secondary':'Secondary',
          'fridge door': 'Fridge Door'} 

In [None]:
temp = pd.read_parquet('Ambient_Temperature.parquet', engine='pyarrow')

In [None]:
def prepare_for_stats( df, location, participant):
    df = df[(df['location_name']==location)&(df['ID']==participant)].reset_index(drop=True)
    df['datetime'] = pd.to_datetime(df['start_date']).dt.tz_localize(tz='UTC')#.dt.round('1T')
    df['datetime'] =  df['datetime'].dt.round('1T')
    if df['datetime'].dt.tz!='Europe/London':
        df['datetime'] = df['datetime'].dt.tz_convert(pytz.timezone('Europe/London'))
    df['date'] =  df['datetime'].dt.date
    if ((df['value'].values < 0).any()==True):
        print('Found negative value, original data size', len(df))
        df['delta_t'] = abs(df['value'].shift(-1)-df['value'])
        df['flag'] = np.where((df['value']<0)&(df['delta_t']>3),True, False)
        df = df[df['flag']!=True]
        print('after aapplying conditions ', len(df))
    if df.duplicated(['datetime']).sum()!=0:
        df['duplicates']=df.duplicated(['datetime']).sum()
        print('Found some duplicate times, removing')
        df = df.drop_duplicates(subset=['datetime'],keep='first')#

    df['noon'] = df['datetime'].dt.floor('D') + pd.Timedelta(7, unit='h')
    df['starts'] = (df['datetime']-df['noon'].iloc[0]).dt.total_seconds()/3600
    df['ends'] = df['starts'].shift(-1)
    df['duration'] = df['ends'] - df['starts']

    df.index = pd.DatetimeIndex(df['datetime'])
    df = df.drop(columns=['datetime'])
    df = df.resample('1T').ffill()
    df = df.rename_axis('datetime').reset_index()
    df = df[df['duration']<24]
    
    return df[['ID','datetime', 'location_name', 'value']].reset_index(drop=True)


def append_row(df, row):
    return pd.concat([ df, pd.DataFrame([row], columns=row.index)]).reset_index(drop=True)
    
def season_metereological(date):
    year = str(date.year)
    seasons = {'around-spring-equinox': pd.date_range(start=year+'-02-04', end=year+'-05-05', tz='UTC'),
               'around-summer-solstice': pd.date_range(start=year+'-05-06', end=year+'-08-06', tz='UTC'),
               'around-autumn-equinox': pd.date_range(start=year+'-08-07', end=year+'-11-07', tz='UTC')}
    if date.round('1D') in seasons['around-spring-equinox']:
        return 'around-spring-equinox'
    if date.round('1D') in seasons['around-summer-solstice']:
        return 'around-summer-solstice'
    if date.round('1D') in seasons['around-autumn-equinox']:
        return 'around-autumn-equinox'
    else:
        return 'around-winter-solstice'

In [None]:
def start_row(participant_id, room, daily_data, date, cluster_number):

    if len(daily_data)/60 == 24:
        max_temp = daily_data['value'].max()
        min_temp = daily_data['value'].min()
        #season = season_metereological(daily_data['datetime'].iloc[0])
        idx_max = daily_data['value'].sub(max_temp).abs().idxmin()
        idx_min = daily_data['value'].sub(min_temp).abs().idxmin()

        start_row = {'participant_id': participant_id,'room': room,'tib_cluster_number':cluster_number,
                                            'date':date, 
                                             'daily_start':daily_data['datetime'].iloc[0], 
                                             'metereological_season':'season', 
                                            'mean_daily_temperature':daily_data['value'].mean(), 
                                            'max_daily_temperature':max_temp,
                                            'min_daily_temperature':min_temp, 
                                            'time_max_daily_temperature':daily_data['datetime'].iloc[idx_max], 
                                            'time_min_daily_temperature':daily_data['datetime'].iloc[idx_min]}
    else:
        start_row = {'participant_id': participant_id,'room': room,'tib_cluster_number':cluster_number,
                                'date':date, 
                                 'daily_start':np.nan, 
                                 'metereological_season':'season', 
                                'mean_daily_temperature':np.nan, 
                                'max_daily_temperature':np.nan,
                                'min_daily_temperature':np.nan, 
                                'time_max_daily_temperature':np.nan, 
                                'time_min_daily_temperature':np.nan}                       
    return start_row


def compute_ntib_averages(ntib_data, time_in_bed_period):
    if len(ntib_data)/60 == time_in_bed_period:
        return {'tib_mean_temperature': ntib_data['value'].mean()}
    else:
        return {'tib_mean_temperature': np.nan}

    
def compute_before_ntib_averages(data):
    if len(data)==180:
        return {'before_tib_mean_temperature':  data['value'].mean()}
    else:
        return {'before_tib_mean_temperature': np.nan}


def compute_after_ntib_averages(data):
    if len(data)==180:
        return {'after_tib_mean_temperature': data['value'].mean()}
    else:
        return {'after_tib_mean_temperature': np.nan}


def compute_temp_stats(daily_averages, ntib_averages,bf_ntib_averages, af_ntib_averages):
    merged_dict = {**daily_averages,**ntib_averages,**bf_ntib_averages,**af_ntib_averages}
    new_row = pd.Series(merged_dict)
    return new_row

In [None]:
def start_temp_table():
    return pd.DataFrame(columns=['participant_id', 'room', 'tib_cluster_number',
                                'date','daily_start','metereological_season', 
                                 'mean_daily_temperature', 'max_daily_temperature', 'min_daily_temperature', 
                                 'time_max_daily_temperature', 'time_min_daily_temperature', 
                                  'tib_mean_temperature',
                                 'before_tib_mean_temperature', 'after_tib_mean_temperature' ])
    
def update_temp_table(home_number, all_df, participant, sleep_info, stats_df):
    sleep = sleep_info[sleep_info['participant_id']==participant]

    #sleep['day_start']= pd.to_datetime(sleep['date'].astype(str)+' 08:00:00+00:00')#.dt.tz_convert(pytz.timezone('Europe/London'))
    sleep['day_start'] = pd.to_datetime(sleep['date']).dt.tz_localize(pytz.timezone('Europe/London'))+pd.Timedelta(hours=7)
    sleep['day_end']= sleep['day_start']+ pd.Timedelta(hours=24) - pd.Timedelta(minutes=1)
    sleep['before_onset'] = sleep['tib_onset'] - pd.Timedelta(hours=3) + pd.Timedelta(minutes=1)
    sleep['after_offset'] = sleep['tib_offset'] + pd.Timedelta(hours=3) - pd.Timedelta(minutes=1)
    
    locations =  ['Bedroom','Lounge','Kitchen','Hallway','Bathroom']
    for room in locations: 
        if len(all_df[(all_df['location_name']==room)&(all_df['ID']==participant)])!=0:
            new_df = prepare_for_stats(all_df, room, participant)
            for i in range(0,len(sleep)):
                
                daily_data = new_df.loc[new_df['datetime'].between(sleep.iloc[i]['day_start'], sleep.iloc[i]['day_end'])].reset_index(drop=True)
                daily_averages = start_row(participant, room, daily_data, sleep.iloc[i]['date'],sleep.iloc[i]['cluster_number'])
                
                ntib_data = new_df.loc[new_df['datetime'].between(sleep.iloc[i]['tib_onset'], sleep.iloc[i]['tib_offset']- pd.Timedelta(minutes=1))].reset_index(drop=True)
                ntib_avg = compute_ntib_averages(ntib_data, sleep.iloc[i]['time_in_bed_period'])
                
                before_ntib_data = new_df.loc[new_df['datetime'].between(sleep.iloc[i]['before_onset'], sleep.iloc[i]['tib_onset'])].reset_index(drop=True)
                before_ntib_avg = compute_before_ntib_averages(before_ntib_data)
                
                after_ntib_data = new_df.loc[new_df['datetime'].between(sleep.iloc[i]['tib_offset'], sleep.iloc[i]['after_offset'])].reset_index(drop=True)
                after_ntib_avg = compute_after_ntib_averages(after_ntib_data)

                new_row = compute_temp_stats(daily_averages, ntib_avg, before_ntib_avg, after_ntib_avg)
                stats_df = append_row(stats_df, new_row)

        else:
            print('not environmental info at all for ', participant, room)

    return stats_df

In [None]:
table = start_temp_table()
#table = pd.read_parquet('light_stats_final.parquet', engine='pyarrow')
for num_participant, participant in enumerate(ids):
        print('current length table:', len(table))
        print('.....................now doing participant: ', num_participant, participant)
        table = update_temp_table(num_participant, temp, participant, sleep_df, table) 
        df = pa.Table.from_pandas(table)
        pq.write_table(df, 'temperature_metrics_bedroom_using_cluster.parquet', compression='BROTLI')

table

Unnamed: 0,participant_id,room,tib_cluster_number,date,daily_start,metereological_season,mean_daily_temperature,max_daily_temperature,min_daily_temperature,time_max_daily_temperature,time_min_daily_temperature,tib_mean_temperature,before_tib_mean_temperature,after_tib_mean_temperature
0,Mhy2u,Bedroom,0,2019-03-31,,season,,,,,,,,
1,Mhy2u,Bedroom,1,2019-04-01,,season,,,,,,,,
2,Mhy2u,Bedroom,2,2019-04-02,,season,,,,,,,,
3,Mhy2u,Bedroom,3,2019-04-03,,season,,,,,,,,
4,Mhy2u,Bedroom,5,2019-04-04,,season,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239398,QbsYB,Bathroom,35,2023-06-15,2023-06-15 07:00:00+01:00,season,27.311528,28.1,26.3,2023-06-15 15:52:00+01:00,2023-06-15 07:00:00+01:00,26.964706,27.710000,26.807778
239399,QbsYB,Bathroom,36,2023-06-16,2023-06-16 07:00:00+01:00,season,27.405833,28.3,26.6,2023-06-16 20:15:00+01:00,2023-06-17 06:01:00+01:00,26.926503,27.696111,26.675556
239400,QbsYB,Bathroom,37,2023-06-17,2023-06-17 07:00:00+01:00,season,27.279722,27.9,26.6,2023-06-17 17:33:00+01:00,2023-06-17 07:00:00+01:00,27.037321,27.575000,26.872222
239401,QbsYB,Bathroom,38,2023-06-18,2023-06-18 07:00:00+01:00,season,27.268056,28.1,26.3,2023-06-18 13:55:00+01:00,2023-06-19 05:56:00+01:00,26.611737,27.267778,26.376667


In [None]:
len(table['participant_id'].unique())

In [None]:
len(table[table.isna().any(axis=1)])*100/len(table)