In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from matplotlib.gridspec import GridSpec
from collections import Counter

# import pyarrow as pa
# import pyarrow.parquet as pq

import seaborn as sns
pd.set_option('display.max_columns', 12)

In [None]:
metrics = pd.read_csv('70_participants_metrics_06_FEB_2024.csv')
metrics.columns.to_list()#'2largest_tib__during_short_ntib'

In [None]:
pd.to_datetime(metrics['time_half_light_bathroom']).head(3)

In [None]:
for col in ['time_half_light_bathroom','time_half_light_bedroom','time_half_light_hallway',
            'time_half_light_kitchen','time_half_light_lounge','time_half_loglight_bathroom',
 'time_half_loglight_bedroom','time_half_loglight_hallway','time_half_loglight_kitchen','time_half_loglight_lounge',
 'time_max_daily_temperature_bathroom','time_max_daily_temperature_bedroom','time_max_daily_temperature_hallway',
 'time_max_daily_temperature_kitchen','time_max_daily_temperature_lounge','time_min_daily_temperature_bedroom',
 'time_min_daily_temperature_hallway','time_min_daily_temperature_kitchen','time_min_daily_temperature_lounge']:
    metrics[col] = pd.to_datetime(metrics[col],errors='coerce')
    metrics[col+'_numeric'] = metrics[col].apply(lambda x: (x.hour+(x.minute/60)) if not isinstance(x, float) else np.nan)
    metrics = metrics.drop(columns=[col])

In [None]:
metrics['time_half_light_bathroom_numeric'].head(3)

In [None]:
print(len(metrics.columns))
metrics = metrics.drop(columns=[ 'tib_onset',
                                 'tib_offset',
                                 'midpoint_sleep',
                                 'sleep_onset',
                                'sleep_offset',
                                 'largest_tib_during_nap',
                                 'largest_tib_during_nap_onset',
                                 'largest_sleep_during_nap_onset',
                                 '2largest_tib__during_nap_onset',
                                 '2largest_sleep_during_nap_onset',
                                 'largest_tib_during_short_ntib_onset',
                                 'largest_sleep_during_short_ntib_onset',
                                 '2largest_tib__during_short_ntib_onset',
                                 '2largest_sleep_during_short_ntib_onset',
                                 'in_bed_bouts',
                                 'out_bed_bouts',
                                 'sleep_bouts',
                                 'wake_bouts',
                                 'new_id',
                                 'Z-Scores_ntib','Unnamed: 0'])
print(len(metrics.columns))

In [None]:
whole_df = metrics

In [None]:
print(whole_df['total_time_in_bed'].mean(), 8.91, whole_df['midpoint_sleep_numeric'].mean(),3.82)
print(whole_df['mean_daily_light_bedroom'].mean(),195.12)

In [None]:
whole_df['median_duration_of_bed_exits'] = whole_df['median_duration_of_bed_exits'].replace(np.nan, 0.0, regex=True)
whole_df['median_wake_bouts'] = whole_df['median_wake_bouts'].replace(np.nan, 0.0, regex=True)

In [None]:
[(k,v) for k,v in Counter(whole_df.columns.str[:31].to_list()).items() if v>1]

In [None]:
whole_df = whole_df.rename(columns={ 'after_ntib_geometric_mean_light_bathroom':'after_ntib_geomean_light_bathroom',
 'after_ntib_geometric_mean_light_bedroom':'after_ntib_geomean_light_bedroom',
 'after_ntib_geometric_mean_light_hallway':'after_ntib_geomean_light_hallway',
 'after_ntib_geometric_mean_light_kitchen':'after_ntib_geomean_light_kitchen',
 'after_ntib_geometric_mean_light_lounge':'after_ntib_geomean_light_lounge', 'before_ntib_geometric_mean_light_bathroom':'before_ntib_geomean_light_bathroom',
 'before_ntib_geometric_mean_light_bedroom':'before_ntib_geomean_light_bedroom',
 'before_ntib_geometric_mean_light_hallway':'before_ntib_geomean_light_hallway',
 'before_ntib_geometric_mean_light_kitchen':'before_ntib_geomean_light_kitchen',
 'before_ntib_geometric_mean_light_lounge':'before_ntib_geomean_light_lounge',
'largest_sleep_during_short_ntib_onset_numeric':'lgt_sl_dur_shntib_onsetnum',
 '2largest_sleep_during_short_ntib_onset_numeric':'2lgt_sl_dur_shntib_onsetnum'
})

In [None]:
[(k,v) for k,v in Counter(whole_df.columns.str[:37].to_list()).items() if v>1]

In [None]:
demographics = pd.read_excel('table_demographics_24_JAN_2024.xlsx') 
#print(demographics['diagnosis'].unique())
for col in demographics.columns:
    if demographics[col].isna().sum()>0:
        print(col,demographics[col].isna().sum())
#         demographics[col] = demographics[col].replace(np.nan, '.', regex=True)

In [None]:
demographics.columns

In [None]:
whole_df = whole_df.merge(demographics[['participant_id','birth_year', 'gender', 'diagnosis',
       'alzheimer_diagnosis',  'baseline_mmse',
       'baseline_npi_total', 'baseline_npi_sleep',
       'date_final_npi', 'final_npi_total', 'final_npi_sleep',
       'age_ate_start','days_with_metrics', 'period_of collection',
       ]], on=['participant_id'],how='left')

whole_df['age_at_metrics_date'] = pd.to_datetime(whole_df['date']).dt.year - whole_df['birth_year']
whole_df['initial_age'] = pd.to_datetime(whole_df['min_date']).dt.year - whole_df['birth_year']

whole_df[['age_at_metrics_date','initial_age']]#.columns[whole_df.isna().any()].tolist()

In [None]:
new_diagnoses = pd.read_excel('clinical_data/comparison_between_clinical_research_diagnosis_13_FEB_2024.xlsx') 
new_diagnoses.columns

whole_df = whole_df.merge(new_diagnoses[['participant_id','Research Diagnosis', 'Analysis Diagnosis',
        'research_alzheimer_diagnosis','analysis_alzheimer_diagnosis']], on=['participant_id'],how='left')

        
whole_df[['participant_id','Research Diagnosis', 'Analysis Diagnosis','alzheimer_diagnosis',
        'research_alzheimer_diagnosis','analysis_alzheimer_diagnosis']]

In [None]:
len(whole_df['participant_id'].unique())

for col in whole_df.columns:
    print(col,whole_df[col].info())

In [None]:
whole_df.to_csv('analysis_table_with_demographics_13_FEB_2024.csv',index=False)#26523

In [None]:
whole_df