# Run this notebook to examine: 

4. Load in Audiogram, RFTs. Get first RFT for Hearing Instruments, remove any that are newer than 15 Nov 2019. 
5. Load in ADLs. Get diff between ADLs and first RFT. 
6. Get monthly ADLS - first month, month 3, 6, 9 and 12.
8. Resample daily usetime across these time periods.
9. compare usetime by HL severity and time period. 

In [1]:
#import files and libraries to create stats
%config IPCompleter.greedy=True
import os
os.chdir('C:\\Projects\\sws_machine_learning')
os.environ['PYTHON_ENV'] = 'PRODUCTION' 
os.environ['PYTHONPATH'] = 'C:\\Projects\\sws_machine_learning'
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd
import glob
from libs.DatalakeIO.DownloadAndRead import LoadDataFromDisk

In [2]:
from libs.DatalakeIO.DataLakeAccess import DataLakeAccess
from libs.DatalakeIO.DownloadAndRead import LoadDataFromDisk
from pathlib import Path
import pandas as pd
import plotly
import plotly.graph_objs as go
import numpy as np
import pickle

In [3]:
from libs.ADL.v2.ADLFiltering import ADLFiltering
from libs.ADL.v2.ADLProcessing import ADLProcessing
from libs.ADL.v2.libUtils import is_datetime_object
from libs.ADL.v2.libADL import ADL

import numpy as np
import pandas as pd
import warnings

In [4]:
Audiogram = LoadDataFromDisk.load_pickl('C:\\mnt\\stognhwhiterabbitweupoc\\audiogrampickl6Oct.pkl')

Loading: C:\mnt\stognhwhiterabbitweupoc\audiogrampickl6Oct.pkl


In [5]:
# get latest audiogram per hearing instrument
print('Number of audiograms: '+str(len(Audiogram)))
audio_latest = Audiogram.sort_values('CreatedDate').groupby('HearingInstrumentId').last().reset_index()
print('Last audiogram per hearing instrument: '+str(len(audio_latest)))

Number of audiograms: 620249
Last audiogram per hearing instrument: 223582


In [6]:
RFT = LoadDataFromDisk.load_pickl('C:\\mnt\\stognhwhiterabbitweupoc\\LatestRFTSetting.pkl')

Loading: C:\mnt\stognhwhiterabbitweupoc\LatestRFTSetting.pkl


In [7]:
# Get first RFT for each hearing instrument
print('Number of RFTs: '+str(len(RFT)))
rft_first = RFT.sort_values('CreatedDate').groupby('HearingInstrumentId').first().reset_index()
print('First RFT per hearing instrument: '+str(len(rft_first)))



Number of RFTs: 710027
First RFT per hearing instrument: 249911


In [8]:
rft_first[['HearingInstrumentId', 'CreatedDate']].head()

Unnamed: 0,HearingInstrumentId,CreatedDate
0,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05T08:10:44.1956758+00:00
1,00012C6BBD0FF06548E5EB79CC3F4438F2D10AFD9EF420...,2020-06-24T15:35:46.0338482+00:00
2,00013F0B2DEE6F6013F2A6385EC90EFFEB0EE43235C77D...,2019-02-15T00:40:46.0874177+00:00
3,0001544FAF49727176FAA1148E3F6A268930841EFF55DE...,2018-11-20T23:37:17.3905974+00:00
4,00015F1CA3F0D69CD4EA38278CFDF629C26B6865472E45...,2020-06-15T16:41:38.9345797+00:00


In [142]:
import datetime
# change created date to datetime
rft_first['Date'] = pd.to_datetime(rft_first['CreatedDate']).dt.date
# limit to HI's that had a fitting before 15 November 2019
print('before limiting to HIs that were first fit before October 2019: '+str(len(rft_first)))
rft_first_2019 = rft_first[rft_first['Date']<datetime.date(2019,11,10)]
print('after limiting to HIs that were first fit before Nov 2019: '+str(len(rft_first_2019)))

rft_first_2019.sort_values('CreatedDate')[['HearingInstrumentId', 'CreatedDate']].tail()

before limiting to HIs that were first fit before October 2019: 249911
after limiting to HIs that were first fit before Nov 2019: 106368


Unnamed: 0,HearingInstrumentId,CreatedDate
10189,0AA0D080F3FD96E347975EC39C51260A9DEE656CE8DB4B...,2019-11-09T18:11:45.5756503+00:00
172817,B102D4E5605B1E140651484DCC9D7CB3219B803341D199...,2019-11-09T18:11:45.5756503+00:00
115637,768A4CC07D6848E865CF187F42EABAE7E5AFD621E1F4F4...,2019-11-09T19:43:40.7940292+00:00
152674,9C6B6DDA2048B8C517F3601EAA2120B0A6E5A0ECD420FC...,2019-11-09T19:43:40.7940292+00:00
118402,7977B5837F7EC67101A889527BBCA0061B0491AEC49364...,2019-11-09T22:18:28.9708796+00:00


In [143]:
# now merge audiograms with rfts
print('Before Merge')
print('hearing instruments with audiograms: '+str(len(audio_latest)))
print('hearing instruments that were fit before Nov 2019: '+str(len(rft_first_2019)))
df = audio_latest.merge(rft_first_2019, on='HearingInstrumentId')
print('after merge: '+str(len(df)))

Before Merge
hearing instruments with audiograms: 223582
hearing instruments that were fit before Nov 2019: 106368
after merge: 99671


In [144]:
# Load in ADLs
#adl = LoadDataFromDisk.load_pickl('C:\\mnt\\stognhwhiterabbitweupoc\\LatestADL.pkl')

In [145]:
# drop duplicates
print('ADLs before dropping duplicates: '+str(len(adl)))

#adls = adl.sort_values('ADLRawData_CreationTime').drop_duplicates('ADLRawData_ADLRawDataId', keep='last')
print('After dropping duplicates: '+str(len(adls)))

ADLs before dropping duplicates: 16124116
After dropping duplicates: 15183607


In [146]:
adls.keys()

Index(['ADLRawData_ADLRawDataId', 'ADLRawData_HearingInstrumentId',
       'ADLRawData_BleAddress', 'ADLRawData_Side',
       'ADLRawData_FittingSessionId', 'ADLRawData_MobileDeviceId',
       'ADLRawData_CreationTime', 'ADLPackageData_FittedPrograms',
       'ADLPackageData_ProgramEnvironmentIds_0',
       'ADLPackageData_ProgramEnvironmentIds_1',
       ...
       'ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_4',
       'ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_5',
       'ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_6',
       'ADLProperties_Bass3StateHistogram_0',
       'ADLProperties_Bass3StateHistogram_1',
       'ADLProperties_Bass3StateHistogram_2',
       'ADLProperties_Bass3StateHistogram_3',
       'ADLProperties_SatisfyProgressPercentage_0',
       'ADLProperties_SatisfyProgressPercentage_1', 'IsRechargeable'],
      dtype='object', length=168)

In [147]:
# merge with RFTs
print('HIs before merge: '+str(len(df)))
print('All adls before merge: '+str(len(adls)))

data = df.merge(adls, left_on='HearingInstrumentId', right_on='ADLRawData_HearingInstrumentId')
print('ADLs after merge: '+str(len(data)))
print('Per HI: '+str(len(data.groupby('ADLRawData_HearingInstrumentId'))))

HIs before merge: 99671
All adls before merge: 15183607
ADLs after merge: 1957942
Per HI: 80147


In [148]:
# Get days between RFT and ADL package 

data['ADLDate'] = pd.to_datetime(data['ADLRawData_CreationTime']).dt.date
data.head()

Unnamed: 0,HearingInstrumentId,RftAudiogramId,CreatedDate_x,TraditionalLossValue,TraditionalLossLabel,LowFreqLossValue,LowFreqLossLabel,HighFreqLossValue,HighFreqLossLabel,SlopeDegreeValue,...,ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_5,ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_6,ADLProperties_Bass3StateHistogram_0,ADLProperties_Bass3StateHistogram_1,ADLProperties_Bass3StateHistogram_2,ADLProperties_Bass3StateHistogram_3,ADLProperties_SatisfyProgressPercentage_0,ADLProperties_SatisfyProgressPercentage_1,IsRechargeable,ADLDate
0,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.066746,0.016814,55.820038,50.15929,0.848557,19.807066,0.0,0.0,False,2018-04-05
1,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,-0.006269,-0.013512,137.35232,116.400883,1.887379,46.61785,0.0,0.0,False,2019-01-09
2,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.276903,0.063115,345.729261,271.595117,4.317779,113.77744,0.0,0.0,False,2020-01-12
3,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.432372,0.081323,401.818726,328.558138,5.673248,132.633178,0.0,0.0,False,2020-01-22
4,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.452219,0.085349,446.253382,389.455629,7.318976,146.042042,0.0,0.0,False,2020-01-30


In [149]:
data['Diff'] = data['ADLDate'] - data['Date'] 
data['Diff'] = data['Diff'].dt.days
data.head(20)

Unnamed: 0,HearingInstrumentId,RftAudiogramId,CreatedDate_x,TraditionalLossValue,TraditionalLossLabel,LowFreqLossValue,LowFreqLossLabel,HighFreqLossValue,HighFreqLossLabel,SlopeDegreeValue,...,ADLProperties_MeanVolumeControlLevelPerEnvironmentPreset1_6,ADLProperties_Bass3StateHistogram_0,ADLProperties_Bass3StateHistogram_1,ADLProperties_Bass3StateHistogram_2,ADLProperties_Bass3StateHistogram_3,ADLProperties_SatisfyProgressPercentage_0,ADLProperties_SatisfyProgressPercentage_1,IsRechargeable,ADLDate,Diff
0,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.016814,55.820038,50.15929,0.848557,19.807066,0.0,0.0,False,2018-04-05,0
1,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,-0.013512,137.35232,116.400883,1.887379,46.61785,0.0,0.0,False,2019-01-09,279
2,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.063115,345.729261,271.595117,4.317779,113.77744,0.0,0.0,False,2020-01-12,647
3,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.081323,401.818726,328.558138,5.673248,132.633178,0.0,0.0,False,2020-01-22,657
4,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.085349,446.253382,389.455629,7.318976,146.042042,0.0,0.0,False,2020-01-30,665
5,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.114604,496.618214,463.405062,8.30919,162.89513,0.0,0.0,False,2020-02-11,677
6,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.1146,497.169568,463.487002,9.611885,162.965958,0.0,0.0,False,2020-02-19,685
7,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.115606,518.072397,482.051091,11.686752,167.72121,0.0,0.0,False,2020-02-29,695
8,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.152616,557.139341,514.057376,12.60336,178.995488,0.0,0.0,False,2020-03-08,703
9,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,8A236F6C488C95C864B3EDBEB9F0425874842A9519953D...,2020-09-10T09:07:55.8561605+00:00,46.25,Moderate,17.5,Normal,58.0,Moderately severe,40.5,...,0.149022,649.073734,561.546003,14.543514,196.441594,0.0,0.0,False,2020-03-22,717


In [150]:
data[['HearingInstrumentId', 'Date', 'ADLDate', 'Diff']]

Unnamed: 0,HearingInstrumentId,Date,ADLDate,Diff
0,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2018-04-05,0
1,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2019-01-09,279
2,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-01-12,647
3,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-01-22,657
4,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-01-30,665
5,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-02-11,677
6,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-02-19,685
7,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-02-29,695
8,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-03-08,703
9,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2018-04-05,2020-03-22,717


In [151]:
# get first week 
print('Initial ADLs and HI count - HIs that were fit with a hearing aid before Nov 2019 & have an audiogram ')
print('ADL entries: '+str(len(data)))
print('ADLs by hearing instruments: '+str(len(data.groupby('HearingInstrumentId'))))

# First month
print('Month 1 output ------------------')
month1 = data[(data['Diff']>=0) & (data['Diff']<30)]
print('ADLs entries: '+str(len(month1)))
print('HIs that sent ADLs: '+str(len(month1.groupby('HearingInstrumentId'))))

# Month 3 
print('Month 3 output ------------------')
month3 = data[(data['Diff']>=90) & (data['Diff']<120)]
print('ADLs entries: '+str(len(month3)))
print('HIs that sent ADLs: '+str(len(month3.groupby('HearingInstrumentId'))))

# Month 6
print('Month 6 output ------------------')
month6 = data[(data['Diff']>=182) & (data['Diff']<213)]
print('ADLs entries: '+str(len(month6)))
print('HIs that sent ADLs: '+str(len(month6.groupby('HearingInstrumentId'))))

# Month 9
print('Month 9 output ------------------')
month9 = data[(data['Diff']>=274) & (data['Diff']<304)]
print('ADLs entries: '+str(len(month9)))
print('HIs that sent ADLs: '+str(len(month9.groupby('HearingInstrumentId'))))


# Month 12
print('Month 12 output --------------------')
month12 = data[(data['Diff']>=364) & (data['Diff']<395)]
print('ADLs entries: '+str(len(month12)))
print('HIs that sent ADLs: '+str(len(month12.groupby('HearingInstrumentId'))))


Initial ADLs and HI count - HIs that were fit with a hearing aid before Nov 2019 & have an audiogram 
ADL entries: 1957942
ADLs by hearing instruments: 80147
Month 1 output ------------------
ADLs entries: 196954
HIs that sent ADLs: 68602
Month 3 output ------------------
ADLs entries: 93079
HIs that sent ADLs: 35824
Month 6 output ------------------
ADLs entries: 77341
HIs that sent ADLs: 30066
Month 9 output ------------------
ADLs entries: 77839
HIs that sent ADLs: 25776
Month 12 output --------------------
ADLs entries: 70627
HIs that sent ADLs: 21072


In [152]:
mth1prep = ADL.use_time_histogram_across_grouped_adls(
                adls=month1,
                hist_bin_size=0.25, # This was originally 24. But that means you are creating one bin for every 24 hours. 
                group_focus=True, inter_freq='1D')

mth3prep = ADL.use_time_histogram_across_grouped_adls(
                adls=month3,
                hist_bin_size=0.25, # This was originally 24. But that means you are creating one bin for every 24 hours. 
                group_focus=True, inter_freq='1D')

mth6prep = ADL.use_time_histogram_across_grouped_adls(
                adls=month6,
                hist_bin_size=0.25, # This was originally 24. But that means you are creating one bin for every 24 hours. 
                group_focus=True, inter_freq='1D')

mth9prep = ADL.use_time_histogram_across_grouped_adls(
                adls=month9,
                hist_bin_size=0.25, # This was originally 24. But that means you are creating one bin for every 24 hours. 
                group_focus=True, inter_freq='1D')

mth12prep = ADL.use_time_histogram_across_grouped_adls(
                adls=month12,
                hist_bin_size=0.25, # This was originally 24. But that means you are creating one bin for every 24 hours. 
                group_focus=True, inter_freq='1D')



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



verbose=True. This is default behavior and increases library transparency. However, for performance focused applications, consider setting verbose=False.

Too few ADLs found for 17148 groups. 17148 ADL(s) (8.7066%) removed.
Reset check [------------------->] 100%
993 ADL(s) (0.5523%) removed.

Reset detected on ADLProperties_TotalUseTime. 993 ADL(s) (0.5523%) removed.

Info: ADLRawData_CreationTime has been converted to datetime. Re-grouping will now be performed and stored in self.adl_grped_df
Making diff. ADL packages for 49672 groups. [------------------->] 100%] 100%verbose=True. This is default behavior and increases library transparency. However, for performance focused applications, consider setting verbose=False.

Too few ADLs found for 10272 groups. 10272 ADL(s) (11.0358%) removed.
Reset check [------------------->] 100%
140 ADL(s) (0.1691%) removed.

Reset detected on ADLProperties_TotalUseTime. 140 ADL(s) (0.1691%) removed.

Info: ADLRawData_CreationTime has been converted t

In [153]:
def maxelements_s(seq):
    maxindex=[]
    for i in seq:
        i = np.asarray(i)
        maxindex.append(np.where(i==i.max())[0])
    return maxindex

# Because delta colums are lists, here is a function to make them not lists. 
def replacelist(hi, column):
    if len(hi[column]) >0:
        return hi[column][0]
    else:
        return np.nan

In [154]:
# Get most common usetime mode
mth1prep['UseTimeMode'] = maxelements_s(mth1prep['counts'])
mth3prep['UseTimeMode'] = maxelements_s(mth3prep['counts'])
mth6prep['UseTimeMode'] = maxelements_s(mth6prep['counts'])
mth9prep['UseTimeMode'] = maxelements_s(mth9prep['counts'])
mth12prep['UseTimeMode'] = maxelements_s(mth12prep['counts'])

# convert from list
mth1prep['UseTimeMode'] = mth1prep.apply(lambda e: replacelist(e, 'UseTimeMode'),axis=1)
mth3prep['UseTimeMode'] = mth3prep.apply(lambda e: replacelist(e, 'UseTimeMode'),axis=1)
mth6prep['UseTimeMode'] = mth6prep.apply(lambda e: replacelist(e, 'UseTimeMode'),axis=1)
mth9prep['UseTimeMode'] = mth9prep.apply(lambda e: replacelist(e, 'UseTimeMode'),axis=1)
mth12prep['UseTimeMode'] = mth12prep.apply(lambda e: replacelist(e, 'UseTimeMode'),axis=1)

# change value into meaningful daily usetime. 
mth1prep['UT_1'] = mth1prep['UseTimeMode']/4
mth3prep['UT_3'] = mth3prep['UseTimeMode']/4
mth6prep['UT_6'] = mth6prep['UseTimeMode']/4
mth9prep['UT_9'] = mth9prep['UseTimeMode']/4
mth12prep['UT_12'] = mth12prep['UseTimeMode']/4

# reset index ready to merge 
mth1reset = mth1prep.reset_index()
mth1reset = mth1reset[['ADLRawData_HearingInstrumentId', 'UseTimeMode', 'UT_1']]

mth3reset = mth3prep.reset_index()
mth3reset = mth3reset[['ADLRawData_HearingInstrumentId', 'UseTimeMode', 'UT_3']]

mth6reset = mth6prep.reset_index()
mth6reset = mth6reset[['ADLRawData_HearingInstrumentId', 'UseTimeMode', 'UT_6']]

mth9reset = mth9prep.reset_index()
mth9reset = mth9reset[['ADLRawData_HearingInstrumentId', 'UseTimeMode', 'UT_9']]

mth12reset = mth12prep.reset_index()
mth12reset = mth12reset[['ADLRawData_HearingInstrumentId', 'UseTimeMode', 'UT_12']]



In [173]:
# merge into one DF
print('HI in mth1: '+str(len(mth1reset)))
print('HI in mth3: '+str(len(mth3reset)))
print('HI in mth6: '+str(len(mth6reset)))
print('HI in mth9: '+str(len(mth9reset)))

df1 = mth1reset.merge(mth3reset, on='ADLRawData_HearingInstrumentId')
print('HI with use time mth1 and mth 3: '+str(len(df1)))

df2 = df1.merge(mth6reset, on='ADLRawData_HearingInstrumentId')
print('HI with use time mth 1, 3 & 6: '+str(len(df2)))

df3 = df2.merge(mth9reset, on='ADLRawData_HearingInstrumentId')
print('HI with use time mth 1, 3, 6 & 9: '+str(len(df3)))

df4 = df3.merge(mth12reset, on='ADLRawData_HearingInstrumentId')
print('HI with use time mth 1, 3, 6, 9 & 12: '+str(len(df4)))

df5 = mth1reset.merge(mth12reset, on='ADLRawData_HearingInstrumentId')
print('HI with usetime month1 and month12: '+str(len(df5)))


HI in mth1: 49374
HI in mth3: 24799
HI in mth6: 20129
HI in mth9: 16814
HI with use time mth1 and mth 3: 21164
HI with use time mth 1, 3 & 6: 12691
HI with use time mth 1, 3, 6 & 9: 8062
HI with use time mth 1, 3, 6, 9 & 12: 4321
HI with usetime month1 and month12: 10196


In [156]:
df4.head(10)

Unnamed: 0,ADLRawData_HearingInstrumentId,UseTimeMode_x,UT_1,UseTimeMode_y,UT_3,UseTimeMode_x.1,UT_6,UseTimeMode_y.1,UT_9,UseTimeMode,UT_12
0,00078CDC958A880B7F89B4C94245C0E079C47218296797...,40,10.0,43,10.75,43,10.75,26,6.5,6,1.5
1,000913F3A22519547F0A488CD5F745AA3801DB179F6927...,58,14.5,56,14.0,62,15.5,61,15.25,60,15.0
2,002346C469EEFC6B8658018B713348E53B1D8190FCB458...,21,5.25,32,8.0,20,5.0,43,10.75,39,9.75
3,0027619CC771F8E9750C84EBC46F19EF056CBF1E8CA705...,62,15.5,60,15.0,62,15.5,61,15.25,62,15.5
4,0036D5FD302CA0A1F141D677513E8A086F0B620FA33665...,53,13.25,61,15.25,57,14.25,50,12.5,51,12.75
5,003EE73C372BCA15781048A09C8503DF9B1F99BB8BA291...,44,11.0,51,12.75,48,12.0,48,12.0,46,11.5
6,005589C1FAE904650065E0863A64D838D1517A8A7F120A...,56,14.0,47,11.75,51,12.75,56,14.0,56,14.0
7,005653093B7C8796C94CFB5605418770C41B2E9AE2DEEE...,55,13.75,57,14.25,54,13.5,48,12.0,43,10.75
8,007EAAE75919B172551FF8A08822DAABF52ACBF165ADCC...,45,11.25,31,7.75,41,10.25,39,9.75,41,10.25
9,00AEF93F590BE610A9E3B406F486D12C554FB5E5BB4756...,66,16.5,64,16.0,65,16.25,63,15.75,50,12.5


In [157]:
#Get last ADL, because now you have calculated use time, you only want to merge in with HL 
data_audio = data[['ADLRawData_HearingInstrumentId', 'ADLDate', 'TraditionalLossLabel']]

df_last = data_audio.groupby('ADLRawData_HearingInstrumentId').last().reset_index()
print('HI with HL and ADLs before extracting usetime information: '+str(len(df_last)))

df_last.head()

HI with HL and ADLs before extracting usetime information: 80147


Unnamed: 0,ADLRawData_HearingInstrumentId,ADLDate,TraditionalLossLabel
0,0000484C61C2C6BB99AE8E549A72BF70AB371B8C6DCDDD...,2020-10-31,Moderate
1,0001544FAF49727176FAA1148E3F6A268930841EFF55DE...,2020-10-05,Slight
2,000190C73F9614EF4E12D94C3793B12524705C537913CD...,2020-10-31,Moderate
3,00026BEBC6AA952E2D18E9418E0695715484DAD04567CD...,2018-04-12,Severe
4,0003790AEA69C20A094CC4DFDFA37BE58ADA727F597F44...,2020-04-24,Moderate


In [174]:
# merge 
print(len(df_last))
print(len(df1))
print(len(df2))
print(len(df3))
print(len(df4))
df1_3 = df_last.merge(df1, on='ADLRawData_HearingInstrumentId')
print(len(df1_3))
df1_6 = df_last.merge(df2, on='ADLRawData_HearingInstrumentId')
print(len(df1_6))
df1_9 = df_last.merge(df3, on='ADLRawData_HearingInstrumentId')
print(len(df1_9))
df1_12 = df_last.merge(df4, on='ADLRawData_HearingInstrumentId')
print(len(df1_12))


80147
21164
12691
8062
4321
21164
12691
8062
4321


In [159]:
df1_12[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,4321.0,4321.0,4321.0,4321.0,4321.0
mean,11.87555,11.679357,11.475179,11.449086,11.29698
std,3.572011,3.786936,4.019693,4.150923,4.250928
min,0.0,0.0,0.0,0.0,0.0
25%,10.25,9.75,9.25,9.25,9.0
50%,12.75,12.5,12.5,12.5,12.5
75%,14.25,14.25,14.25,14.25,14.25
max,24.0,24.0,24.0,24.0,24.0


In [160]:
# split out data by hearing loss groups
df1_12['TraditionalLossLabel'].value_counts()

Moderate    2036
Slight      1113
Severe       785
Profound     196
Normal       191
Name: TraditionalLossLabel, dtype: int64

In [161]:
normal = df1_12[df1_12['TraditionalLossLabel']=='Normal']
slight = df1_12[df1_12['TraditionalLossLabel']=='Slight']
moderate = df1_12[df1_12['TraditionalLossLabel']=='Moderate']
severe = df1_12[df1_12['TraditionalLossLabel']=='Severe']
profound = df1_12[df1_12['TraditionalLossLabel']=='Profound']

In [162]:
profound[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,196.0,196.0,196.0,196.0,196.0
mean,12.832908,13.207908,13.247449,13.211735,12.771684
std,3.879245,3.575264,3.469001,3.830604,4.09164
min,0.0,1.0,2.75,0.75,1.0
25%,10.75,11.75,11.5,12.0,11.4375
50%,13.5,13.75,13.75,13.75,13.5
75%,15.0,15.0,15.0,15.25,15.0
max,24.0,24.0,24.0,24.0,24.0


# Histograms 

In [163]:
import plotly.graph_objects as go

import numpy as np
x0 = df1_12['UT_1']
x1 = df1_12['UT_3']
x2 = df1_12['UT_6']
x3 = df1_12['UT_9']
x4 = df1_12['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#fdfd96', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#ffc501', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#ff9801', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#037d50', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#024b30', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
df1_12[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,4321.0,4321.0,4321.0,4321.0,4321.0
mean,11.87555,11.679357,11.475179,11.449086,11.29698
std,3.572011,3.786936,4.019693,4.150923,4.250928
min,0.0,0.0,0.0,0.0,0.0
25%,10.25,9.75,9.25,9.25,9.0
50%,12.75,12.5,12.5,12.5,12.5
75%,14.25,14.25,14.25,14.25,14.25
max,24.0,24.0,24.0,24.0,24.0


In [175]:
import plotly.graph_objects as go

import numpy as np
x0 = normal['UT_1']
x1 = normal['UT_3']
x2 = normal['UT_6']
x3 = normal['UT_9']
x4 = normal['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#f6e2c3', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#4c616a', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#de5023', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#b13a15', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#00243e', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time - Normal Hearing ', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
normal[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,191.0,191.0,191.0,191.0,191.0
mean,10.816754,9.943717,9.701571,9.493455,9.191099
std,3.854653,4.216014,4.233864,4.353117,4.779493
min,1.0,0.5,0.0,0.0,0.0
25%,8.75,6.75,6.875,6.625,5.5
50%,11.75,11.0,9.75,9.5,10.25
75%,13.25,12.875,12.75,12.875,13.125
max,23.75,23.75,23.75,23.75,23.75


In [176]:
import plotly.graph_objects as go

import numpy as np
x0 = slight['UT_1']
x1 = slight['UT_3']
x2 = slight['UT_6']
x3 = slight['UT_9']
x4 = slight['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#a4cbca', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#e4aaa6', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#b54918', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#f1a000', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#2b2e39', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time - Slight Hearing Loss ', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
slight[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,1113.0,1113.0,1113.0,1113.0,1113.0
mean,11.193172,10.74708,10.265049,10.302785,10.094789
std,3.643647,3.988665,4.183333,4.302661,4.400964
min,0.25,0.0,0.0,0.0,0.0
25%,9.0,8.25,7.5,7.5,7.0
50%,12.0,11.75,11.0,11.25,11.0
75%,13.75,13.75,13.5,13.75,13.5
max,23.75,23.75,23.75,23.75,23.75


In [177]:
import plotly.graph_objects as go

import numpy as np
x0 = moderate['UT_1']
x1 = moderate['UT_3']
x2 = moderate['UT_6']
x3 = moderate['UT_9']
x4 = moderate['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#056d83', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#2c1847', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#daa520', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#ffc3a0', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#ff7f50', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time - Moderate Hearing Loss ', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
moderate[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,2036.0,2036.0,2036.0,2036.0,2036.0
mean,11.834971,11.687623,11.509332,11.485634,11.477407
std,3.452614,3.615993,3.892762,4.040445,4.022027
min,0.0,0.0,0.0,0.0,0.0
25%,10.25,10.0,9.5,9.5,9.5
50%,12.75,12.5,12.5,12.75,12.5
75%,14.25,14.25,14.25,14.25,14.25
max,23.75,23.75,23.75,23.75,24.0


In [178]:
import plotly.graph_objects as go

import numpy as np
x0 = severe['UT_1']
x1 = severe['UT_3']
x2 = severe['UT_6']
x3 = severe['UT_9']
x4 = severe['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#220c10', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#506c64', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#77cbb9', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#75b8c8', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#cdd3d5', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time - Severe Hearing Loss ', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
severe[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,785.0,785.0,785.0,785.0,785.0
mean,12.966879,13.020382,13.091401,13.015287,12.677707
std,3.290905,3.267525,3.3603,3.46892,3.839445
min,0.0,0.0,0.25,0.0,0.0
25%,12.0,11.75,11.75,12.0,11.5
50%,13.5,13.75,13.75,13.75,13.5
75%,14.75,15.0,15.25,15.0,15.0
max,23.75,23.5,23.75,23.75,24.0


In [179]:
import plotly.graph_objects as go

import numpy as np
x0 = profound['UT_1']
x1 = profound['UT_3']
x2 = profound['UT_6']
x3 = profound['UT_9']
x4 = profound['UT_12']

fig = go.Figure()
fig.add_trace(go.Histogram(
    x=x0,
    histnorm='percent',
    name='Month 1', # name used in legend and hover labels
    marker_color='#ae1e1e', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x1,
    histnorm='percent',
    name='Month 3',
    marker_color='#f54141', nbinsx=15,
))

fig.add_trace(go.Histogram(
    x=x2,
    histnorm='percent',
    name='Month 6', # name used in legend and hover labels
    marker_color='#de9494', nbinsx=15,
))
fig.add_trace(go.Histogram(
    x=x3,
    histnorm='percent',
    name='Month 9',
    marker_color='#594b4b', nbinsx=15
))
fig.add_trace(go.Histogram(
    x=x4,
    histnorm='percent',
    name='Month 12',
    marker_color='#000000', nbinsx=15
))


fig.update_layout(
    title_text='Daily Use Time - Profound Hearing Loss ', # title of plot
    xaxis_title_text='Daily use time (hrs)', # xaxis label
    yaxis_title_text='% of hearing instruments ', # yaxis label
    bargap=0.2, # gap between bars of adjacent location coordinates
    bargroupgap=0.1, # gap between bars of the same location coordinates
    template = "plotly_white", # set template to white
)

fig.show()
print('Descriptive Statistics')
profound[['UT_1', 'UT_3', 'UT_6', 'UT_9', 'UT_12']].describe()

Descriptive Statistics


Unnamed: 0,UT_1,UT_3,UT_6,UT_9,UT_12
count,196.0,196.0,196.0,196.0,196.0
mean,12.832908,13.207908,13.247449,13.211735,12.771684
std,3.879245,3.575264,3.469001,3.830604,4.09164
min,0.0,1.0,2.75,0.75,1.0
25%,10.75,11.75,11.5,12.0,11.4375
50%,13.5,13.75,13.75,13.75,13.5
75%,15.0,15.0,15.0,15.25,15.0
max,24.0,24.0,24.0,24.0,24.0


# Paired t-test
- compared for whole dataset and then for sub group analysis

In [181]:
import pingouin as pg
pg.normality(df1_12['UT_1'])


Unnamed: 0,W,pval,normal
UT_1,0.939905,1.0184699999999999e-38,False


In [182]:
import pingouin as pg
pg.normality(df1_12['UT_12'])

Unnamed: 0,W,pval,normal
UT_12,0.929525,4.114493e-41,False


In [None]:
# Normality failed, so you want to use a non-parametric test 

In [184]:
x = df1_12['UT_1']
y = df1_12['UT_3']
pg.wilcoxon(x, y, tail='greater')

# RBC effect size
# CLES = common language effect size 

Unnamed: 0,W-val,tail,p-val,RBC,CLES
Wilcoxon,4487055.5,greater,3.554741e-08,0.097758,0.496531


# actually, you want to do an ANOVA to compare different time periods