# External Factors Merge

Copyright ©2021-2022. Stephen Rigden. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

In [65]:
import pandas
from pathlib import Path

import utilities

In [66]:
# Set file paths
project_path = Path.cwd().parent.parent
extra_data_pickle = project_path / 'data' / 'processed' / 'extra_data_preprocessed.pickle'
heart_data_pickle = project_path / 'data' / 'processed' / 'heart_preprocessed.pickle'
heart_and_externals_pickle = project_path / 'data' / 'processed' / 'heart_and_externals.pickle'

### Load and view the data

In [67]:
hds = pandas.read_pickle(heart_data_pickle)
eds = pandas.read_pickle(extra_data_pickle)

In [68]:
hds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 179803 entries, 5 to 179807
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   value   179803 non-null  float64       
 1   type    179803 non-null  object        
 2   date    179803 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 5.5+ MB


In [69]:
hds.head()

Unnamed: 0,value,type,date
5,67.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:11:49
6,67.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:16:17
7,61.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:09
8,64.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:13
9,63.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:18


In [70]:
eds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28 entries, 0 to 27
Data columns (total 4 columns):
 #   Column      Non-Null Count  Dtype         
---  ------      --------------  -----         
 0   date        28 non-null     datetime64[ns]
 1   metoprolol  28 non-null     Int64         
 2   alcohol     28 non-null     Int64         
 3   notes       28 non-null     object        
dtypes: Int64(2), datetime64[ns](1), object(1)
memory usage: 1.1+ KB


In [71]:
eds.head()

Unnamed: 0,date,metoprolol,alcohol,notes
0,2021-12-01,2,0,
1,2021-12-02,2,177,
2,2021-12-03,2,0,
3,2021-12-04,2,0,
4,2021-12-05,2,30,


### Extract blood pressure data and merge with extra data file

In [72]:
bpds = utilities.create_blood_pressure_dataset(hds)
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 480 entries, 0 to 479
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            480 non-null    datetime64[ns]
 1   systolic        480 non-null    float64       
 2   diastolic       480 non-null    float64       
 3   pulse pressure  480 non-null    float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 18.8 KB


In [73]:
bpds.head()

Unnamed: 0,date,systolic,diastolic,pulse pressure
0,2021-08-06 20:53:00,153.0,79.0,74.0
1,2021-08-06 20:58:00,136.0,85.0,51.0
2,2021-08-06 23:13:00,135.0,77.0,58.0
3,2021-08-07 01:14:00,121.0,73.0,48.0
4,2021-08-07 22:21:00,132.0,71.0,61.0


In [74]:
bpds['day'] = bpds['date'].dt.date.astype('datetime64[ns]')
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 480 entries, 0 to 479
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            480 non-null    datetime64[ns]
 1   systolic        480 non-null    float64       
 2   diastolic       480 non-null    float64       
 3   pulse pressure  480 non-null    float64       
 4   day             480 non-null    datetime64[ns]
dtypes: datetime64[ns](2), float64(3)
memory usage: 22.5 KB


In [75]:
bpds.head()

Unnamed: 0,date,systolic,diastolic,pulse pressure,day
0,2021-08-06 20:53:00,153.0,79.0,74.0,2021-08-06
1,2021-08-06 20:58:00,136.0,85.0,51.0,2021-08-06
2,2021-08-06 23:13:00,135.0,77.0,58.0,2021-08-06
3,2021-08-07 01:14:00,121.0,73.0,48.0,2021-08-07
4,2021-08-07 22:21:00,132.0,71.0,61.0,2021-08-07


In [76]:
bpds.tail()

Unnamed: 0,date,systolic,diastolic,pulse pressure,day
475,2021-12-26 22:16:00,128.0,80.0,48.0,2021-12-26
476,2021-12-26 23:10:00,131.0,79.0,52.0,2021-12-26
477,2021-12-27 00:05:00,142.0,80.0,62.0,2021-12-27
478,2021-12-27 01:00:00,138.0,82.0,56.0,2021-12-27
479,2021-12-28 00:24:00,126.0,78.0,48.0,2021-12-28


In [77]:
bpds = bpds.merge(eds, left_on=['day'], right_on=['date'])
bpds = bpds.loc[:, ['date_x', 'systolic', 'diastolic', 'pulse pressure', 'metoprolol', 'alcohol']]
bpds = bpds.rename(columns={'date_x': 'date'})
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 96 entries, 0 to 95
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            96 non-null     datetime64[ns]
 1   systolic        96 non-null     float64       
 2   diastolic       96 non-null     float64       
 3   pulse pressure  96 non-null     float64       
 4   metoprolol      96 non-null     Int64         
 5   alcohol         96 non-null     Int64         
dtypes: Int64(2), datetime64[ns](1), float64(3)
memory usage: 5.4 KB


In [78]:
bpds.head()

Unnamed: 0,date,systolic,diastolic,pulse pressure,metoprolol,alcohol
0,2021-12-01 22:30:00,131.0,86.0,45.0,2,0
1,2021-12-01 23:38:00,144.0,79.0,65.0,2,0
2,2021-12-02 00:22:00,140.0,80.0,60.0,2,177
3,2021-12-02 01:16:00,135.0,78.0,57.0,2,177
4,2021-12-02 22:19:00,131.0,70.0,61.0,2,177


In [79]:
bpds.tail()

Unnamed: 0,date,systolic,diastolic,pulse pressure,metoprolol,alcohol
91,2021-12-26 22:16:00,128.0,80.0,48.0,1,0
92,2021-12-26 23:10:00,131.0,79.0,52.0,1,0
93,2021-12-27 00:05:00,142.0,80.0,62.0,2,25
94,2021-12-27 01:00:00,138.0,82.0,56.0,2,25
95,2021-12-28 00:24:00,126.0,78.0,48.0,0,0


In [80]:
bpds.to_pickle(heart_and_externals_pickle)