# Stats from health events

Copyright 2023 Maria Lima  
(mr3418@ic.ac.uk)

Last updated: 10/06/2023

**Aim:** investigate whether households with PLWD continued using Alexa during the week following the occurrence of health events. 
- We used the dates of individual health events (e.g., falls, infections, hospitalisations) logged by a monitoring team in regular contact with participants. 
- We considered user-initiated triggers of both the questionnaire and random Alexa interactions, specifically in
the 7 days after the occurrence of a health event. 
- 38 health events were evaluated across the cohort

### Dependencies

In [1]:
import pickle
import numpy as np
import pandas as pd
import pylab as plt
import ipynb
import gzip
import datetime
import pingouin as pg
from datetime import timedelta
from scipy.stats import wilcoxon
from statsmodels.stats.multitest import multipletests
from utils import filter_data_events as eve

### Load and process data

In [2]:
PATH = './datasets/'
df = pickle.load(open(PATH + 'df_alexa.pkl', 'rb'))
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 8491 entries, 0 to 831
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   patient_id  8491 non-null   object        
 1   timeframe   8491 non-null   datetime64[ns]
 2   int_type    8491 non-null   object        
 3   date        8491 non-null   object        
dtypes: datetime64[ns](1), object(3)
memory usage: 331.7+ KB


##### 1) Define health events which can be used for analysis per participant

In [3]:
# all events
health_events_p1 = [(datetime.date(2021, 9, 28), datetime.date(2021, 9, 28)),
                (datetime.date(2021, 10, 14), datetime.date(2021, 10, 14))]

health_events_p2 = [(datetime.date(2021, 7, 12), datetime.date(2021, 7, 18)), # HOSP
                 (datetime.date(2021, 9, 1), datetime.date(2021, 9, 1)), 
                 (datetime.date(2021, 9, 18), datetime.date(2021, 9, 18)), 
                 (datetime.date(2021, 11, 21), datetime.date(2021, 11, 21)),
                 (datetime.date(2021, 12, 30), datetime.date(2021, 12, 30)),
                 (datetime.date(2022, 2, 3), datetime.date(2022, 2, 23)), # HOSP
                 (datetime.date(2022, 3, 6), datetime.date(2022, 3, 10)),
                 (datetime.date(2022, 3, 10), datetime.date(2022, 3, 31)), # HOSP
                 (datetime.date(2022, 3, 15), datetime.date(2022, 3, 15)) 
                 ] # HOSP

health_events_p3 = [(datetime.date(2021, 8, 17), datetime.date(2021, 9, 13)),
                (datetime.date(2021, 8, 24), datetime.date(2021, 8, 26)), # HOSP
                (datetime.date(2022, 3, 6), datetime.date(2022, 3, 8))] # HOSP

health_events_p6 = [(datetime.date(2021, 11, 6), datetime.date(2021, 11, 13))]


health_events_p7 = [(datetime.date(2021, 9, 9), datetime.date(2021, 9, 9)),
                   (datetime.date(2021, 9, 21), datetime.date(2021, 9, 30)),
                    (datetime.date(2022, 3, 19), datetime.date(2022, 3, 19)),
                    (datetime.date(2022, 4, 5), datetime.date(2022, 4, 5))]
# (datetime.date(2021, 9, 9), datetime.date(2021, 9, 9)), wont consider this one 
# because event happened 1 day after Alexa usage, so there are no 7 days to look at 'before'

health_events_p9 = [(datetime.date(2021, 11, 6), datetime.date(2021, 11, 6)),
                    (datetime.date(2021, 12, 8), datetime.date(2021, 12, 8))
                    ]

health_events_p10 = [(datetime.date(2021, 11, 28), datetime.date(2021, 11, 28)), # HOSP
                    (datetime.date(2021, 12, 7), datetime.date(2021, 12, 7)) # HOSP
                    ]

health_events_p11 = [(datetime.date(2021, 9, 27), datetime.date(2021, 9, 27))
                    ]

health_events_p12 = [
                (datetime.date(2021, 12, 7), datetime.date(2021, 12, 7)),
                (datetime.date(2022, 2, 27), datetime.date(2022, 2, 27)),
                (datetime.date(2022, 3, 24), datetime.date(2022, 3, 24)),
                (datetime.date(2022, 4, 28), datetime.date(2022, 4, 28))]

health_events_p13 = [(datetime.date(2021, 10, 29), datetime.date(2021, 11, 5)),
                (datetime.date(2021, 11, 30), datetime.date(2021, 12, 10)),
                (datetime.date(2021, 12, 29), datetime.date(2021, 12, 29)),
                (datetime.date(2022, 1, 18), datetime.date(2022, 1, 31)),
                (datetime.date(2022, 2, 11), datetime.date(2022, 2, 18)),
                (datetime.date(2022, 2, 22), datetime.date(2022, 3, 8)),
                (datetime.date(2022, 3, 12), datetime.date(2022, 3, 29)),
                (datetime.date(2022, 3, 29), datetime.date(2022, 4, 12)),
                (datetime.date(2022, 5, 10), datetime.date(2022, 5, 10)),
                (datetime.date(2022, 5, 31), datetime.date(2022, 5, 31))]
len(health_events_p1) + len(health_events_p2) + len(health_events_p3) + len(health_events_p6) +\
     len(health_events_p7) + len(health_events_p9)+ len(health_events_p10) +len(health_events_p11) +\
        len(health_events_p12) +len(health_events_p13) 

# hospitalizations only across cohort
health_events_p2_hosp = [(datetime.date(2021, 7, 12), datetime.date(2021, 7, 18)), # HOSP
                (datetime.date(2022, 2, 3), datetime.date(2022, 2, 23)), # HOSP
                (datetime.date(2022, 3, 10), datetime.date(2022, 3, 31))] # HOSP

health_events_p3_hosp = [(datetime.date(2021, 8, 24), datetime.date(2021, 8, 26)), # HOSP
                (datetime.date(2022, 3, 6), datetime.date(2022, 3, 8))] # HOSP

health_events_p10_hosp = [(datetime.date(2021, 11, 28), datetime.date(2021, 11, 28)), # HOSP
                    (datetime.date(2021, 12, 7), datetime.date(2021, 12, 7))] # HOSP


# other events (not hospitalization) across cohort
health_events_p1_NO = [(datetime.date(2021, 9, 28), datetime.date(2021, 9, 28)),
                (datetime.date(2021, 10, 14), datetime.date(2021, 10, 14))]

health_events_p2_NO = [
                 (datetime.date(2021, 9, 1), datetime.date(2021, 9, 1)), 
                 (datetime.date(2021, 9, 18), datetime.date(2021, 9, 18)), 
                 (datetime.date(2021, 11, 21), datetime.date(2021, 11, 21)),
                 (datetime.date(2021, 12, 30), datetime.date(2021, 12, 30)),
                 (datetime.date(2022, 3, 6), datetime.date(2022, 3, 10)),
                 (datetime.date(2022, 3, 15), datetime.date(2022, 3, 15))
                ]


health_events_p3_NO = [(datetime.date(2021, 8, 17), datetime.date(2021, 9, 13))]

health_events_p6_NO = [(datetime.date(2021, 11, 6), datetime.date(2021, 11, 13))]

health_events_p7_NO = [
                    (datetime.date(2021, 9, 9), datetime.date(2021, 9, 9)),
                   (datetime.date(2021, 9, 21), datetime.date(2021, 9, 30)),
                    (datetime.date(2022, 3, 19), datetime.date(2022, 3, 19)),
                    (datetime.date(2022, 4, 5), datetime.date(2022, 4, 5))]

health_events_p9_NO = [(datetime.date(2021, 11, 6), datetime.date(2021, 11, 6)),
                    (datetime.date(2021, 12, 8), datetime.date(2021, 12, 8))
                    ]

health_events_p11_NO = [(datetime.date(2021, 9, 27), datetime.date(2021, 9, 27))
                    ]

health_events_p12_NO = [
                (datetime.date(2021, 12, 7), datetime.date(2021, 12, 7)),
                (datetime.date(2022, 2, 27), datetime.date(2022, 2, 27)),
                (datetime.date(2022, 3, 24), datetime.date(2022, 3, 24)),
                (datetime.date(2022, 4, 28), datetime.date(2022, 4, 28))]

health_events_p13_NO = [(datetime.date(2021, 10, 29), datetime.date(2021, 11, 5)),
                (datetime.date(2021, 11, 30), datetime.date(2021, 12, 10)),
                (datetime.date(2021, 12, 29), datetime.date(2021, 12, 29)),
                (datetime.date(2022, 1, 18), datetime.date(2022, 1, 31)),
                (datetime.date(2022, 2, 11), datetime.date(2022, 2, 18)),
                (datetime.date(2022, 2, 22), datetime.date(2022, 3, 8)),
                (datetime.date(2022, 3, 12), datetime.date(2022, 3, 29)),
                (datetime.date(2022, 3, 29), datetime.date(2022, 4, 12)),
                (datetime.date(2022, 5, 10), datetime.date(2022, 5, 10)),
                (datetime.date(2022, 5, 31), datetime.date(2022, 5, 31))]

##### 2) Stats with all health events

In [4]:
p1, d1 = eve.seven_days(df, 'P1', health_events_p1)
p2, d2 = eve.seven_days(df, 'P2', health_events_p2)
p3, d3 = eve.seven_days(df, 'P3', health_events_p3)
p6, d6 = eve.seven_days(df, 'P6', health_events_p6)
p7, d7 = eve.seven_days(df, 'P7', health_events_p7)
p9, d9 = eve.seven_days(df, 'P9', health_events_p9)
p10, d10 = eve.seven_days(df, 'P10', health_events_p10)
p11, d11 = eve.seven_days(df, 'P11', health_events_p11)
p12, d12 = eve.seven_days(df, 'P12', health_events_p12)
p13, d13 = eve.seven_days(df, 'P13', health_events_p13)
all_intervals = p1 + p2 + p3 + p6 + p7 + p9 + p10 + p11 + p12 + p13
diff = d1 + d2 + d3+ d6 + d7 + d9 + d10 + d11 + d12 + d13

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  before['health_event'] = 'before'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

In [5]:
bef_all, aft_all = eve.get_bef_aft_arrays(all_intervals)
stat_all, p_all = wilcoxon(diff, alternative='greater')
wilcoxon(aft_all, alternative='greater')

WilcoxonResult(statistic=561.0, pvalue=2.687399847392446e-07)

In [6]:
x = np.array(bef_all)
y = np.array(aft_all)
xx = np.array(diff)
pg.wilcoxon(y, alternative='greater')

Unnamed: 0,W-val,alternative,p-val,RBC,CLES
Wilcoxon,561.0,greater,2.815059e-07,1.0,


##### 3) Stats with all events but hospitalizations

In [7]:
p1_NO, d1_NO = eve.seven_days(df, 'P1', health_events_p1_NO)
p2_NO, d2_NO = eve.seven_days(df, 'P2', health_events_p2_NO)
p3_NO, d3_NO = eve.seven_days(df, 'P3', health_events_p3_NO)
p6_NO, d6_NO = eve.seven_days(df, 'P6', health_events_p6_NO)
p7_NO, d7_NO = eve.seven_days(df, 'P7', health_events_p7_NO)
p9_NO, d9_NO = eve.seven_days(df, 'P9', health_events_p9_NO)
p11_NO, d11_NO = eve.seven_days(df, 'P11', health_events_p11_NO)
p12_NO, d12_NO = eve.seven_days(df, 'P12', health_events_p12_NO)
p13_NO, d13_NO = eve.seven_days(df, 'P13', health_events_p13_NO)
all_intervals_NO = p1_NO + p2_NO + p3_NO + p6_NO + p7_NO + p9_NO + p11_NO + p12_NO + p13_NO
diff_NO = d1_NO + d2_NO + d3_NO + d6_NO + d7_NO + d9_NO + d11_NO + d12_NO + d13_NO

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  before['health_event'] = 'before'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

In [8]:
bef_NO, aft_NO = eve.get_bef_aft_arrays(all_intervals_NO)
stat_NO, p_NO = wilcoxon(aft_NO, alternative='greater')
wilcoxon(aft_NO, alternative='greater')

WilcoxonResult(statistic=435.0, pvalue=1.277232687837986e-06)

In [9]:
pg.wilcoxon(y, alternative='greater')

Unnamed: 0,W-val,alternative,p-val,RBC,CLES
Wilcoxon,561.0,greater,2.815059e-07,1.0,


##### 4) Stats with hospitalizations events

In [10]:
p2_hosp, d2_hosp = eve.seven_days(df, 'P2', health_events_p2_hosp)
p3_hosp, d3_hosp = eve.seven_days(df, 'P3', health_events_p3_hosp)
p10_hosp, d10_hosp = eve.seven_days(df, 'P10', health_events_p10_hosp)
all_intervals_hosp = p2_hosp + p3_hosp + p10_hosp
diff_hosp = d2_hosp + d3_hosp + d10_hosp

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  before['health_event'] = 'before'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  after['health_event'] = 'after'
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] 

In [11]:
bef_WITH, aft_WITH = eve.get_bef_aft_arrays(all_intervals_hosp)
stat_WITH, p_WITH = wilcoxon(aft_WITH, alternative='greater')
wilcoxon(aft_WITH, alternative='greater')



WilcoxonResult(statistic=10.0, pvalue=0.033944577430914495)

In [12]:
x = np.array(bef_WITH)
y = np.array(aft_WITH)
xx = np.array(diff_hosp)
pg.wilcoxon(y, alternative='greater')

Unnamed: 0,W-val,alternative,p-val,RBC,CLES
Wilcoxon,10.0,greater,0.050174,1.0,


##### 5) Corrected p-values

In [13]:
pvals = [p_all, p_NO, p_WITH]
# as seen. corrrected p-value to use is 0.017
multipletests(pvals, alpha=0.05, method='fdr_bh', is_sorted=False, returnsorted=False)

(array([False,  True, False]),
 array([8.23135840e-01, 3.83169806e-06, 5.09168661e-02]),
 0.016952427508441503,
 0.016666666666666666)