Copyright ©2022. Stephen Rigden. This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program. If not, see http://www.gnu.org/licenses/.

In [1]:
import pandas
from pathlib import Path

from heart_health import utilities

In [2]:
# Set file paths
project_path = Path.cwd().parent.parent
extra_data_pickle = project_path / 'data' / 'processed' / 'extra_data_preprocessed.pickle'
heart_data_pickle = project_path / 'data' / 'processed' / 'heart_preprocessed.pickle'
heart_and_externals_pickle = project_path / 'data' / 'processed' / 'heart_and_externals.pickle'

### Load and view the data

In [3]:
hds = pandas.read_pickle(heart_data_pickle)
eds = pandas.read_pickle(extra_data_pickle)

In [4]:
hds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 181400 entries, 5 to 181404
Data columns (total 3 columns):
 #   Column  Non-Null Count   Dtype         
---  ------  --------------   -----         
 0   value   181400 non-null  float64       
 1   type    181400 non-null  object        
 2   date    181400 non-null  datetime64[ns]
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 5.5+ MB


In [5]:
hds.head()

Unnamed: 0,value,type,date
5,67.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:11:49
6,67.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:16:17
7,61.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:09
8,64.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:13
9,63.0,HKQuantityTypeIdentifierHeartRate,2020-03-30 20:21:18


In [6]:
hds.tail()

Unnamed: 0,value,type,date
181400,78.0,HKQuantityTypeIdentifierBloodPressureDiastolic,2021-12-31 01:19:00
181401,70.0,HKQuantityTypeIdentifierBloodPressureDiastolic,2021-12-31 21:59:00
181402,81.0,HKQuantityTypeIdentifierBloodPressureDiastolic,2021-12-31 23:13:00
181403,79.0,HKQuantityTypeIdentifierBloodPressureDiastolic,2022-01-01 00:11:00
181404,81.0,HKQuantityTypeIdentifierBloodPressureDiastolic,2022-01-01 01:25:00


In [7]:
eds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 33 entries, 0 to 32
Data columns (total 4 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   date           33 non-null     datetime64[ns]
 1   heart_tablets  33 non-null     Int64         
 2   alcohol        33 non-null     Int64         
 3   notes          33 non-null     object        
dtypes: Int64(2), datetime64[ns](1), object(1)
memory usage: 1.2+ KB


In [8]:
eds.head()

Unnamed: 0,date,heart_tablets,alcohol,notes
0,2021-12-01,2,0,
1,2021-12-02,2,177,
2,2021-12-03,2,0,
3,2021-12-04,2,0,
4,2021-12-05,2,30,


### Extract blood pressure data and merge with extra data file

In [9]:
bpds = utilities.create_blood_pressure_dataset(hds)
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 496 entries, 0 to 495
Data columns (total 4 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            496 non-null    datetime64[ns]
 1   systolic        496 non-null    float64       
 2   diastolic       496 non-null    float64       
 3   pulse pressure  496 non-null    float64       
dtypes: datetime64[ns](1), float64(3)
memory usage: 19.4 KB


In [10]:
bpds.head()

Unnamed: 0,date,systolic,diastolic,pulse pressure
0,2021-08-06 20:53:00,153.0,79.0,74.0
1,2021-08-06 20:58:00,136.0,85.0,51.0
2,2021-08-06 23:13:00,135.0,77.0,58.0
3,2021-08-07 01:14:00,121.0,73.0,48.0
4,2021-08-07 22:21:00,132.0,71.0,61.0


In [11]:
day = bpds['date'].dt.date.astype('datetime64[ns]')
bpds = bpds.merge(eds, left_on=[day], right_on=['date'])
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 112 entries, 0 to 111
Data columns (total 9 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            112 non-null    datetime64[ns]
 1   date_x          112 non-null    datetime64[ns]
 2   systolic        112 non-null    float64       
 3   diastolic       112 non-null    float64       
 4   pulse pressure  112 non-null    float64       
 5   date_y          112 non-null    datetime64[ns]
 6   heart_tablets   112 non-null    Int64         
 7   alcohol         112 non-null    Int64         
 8   notes           112 non-null    object        
dtypes: Int64(2), datetime64[ns](3), float64(3), object(1)
memory usage: 9.0+ KB


In [12]:
bpds = bpds.loc[:, ['date_x', 'systolic', 'diastolic', 'pulse pressure', 'heart_tablets', 'alcohol']]
bpds = bpds.rename(columns={'date_x': 'date'})
bpds.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 112 entries, 0 to 111
Data columns (total 6 columns):
 #   Column          Non-Null Count  Dtype         
---  ------          --------------  -----         
 0   date            112 non-null    datetime64[ns]
 1   systolic        112 non-null    float64       
 2   diastolic       112 non-null    float64       
 3   pulse pressure  112 non-null    float64       
 4   heart_tablets   112 non-null    Int64         
 5   alcohol         112 non-null    Int64         
dtypes: Int64(2), datetime64[ns](1), float64(3)
memory usage: 6.3 KB


In [13]:
bpds.head()

Unnamed: 0,date,systolic,diastolic,pulse pressure,heart_tablets,alcohol
0,2021-12-01 22:30:00,131.0,86.0,45.0,2,0
1,2021-12-01 23:38:00,144.0,79.0,65.0,2,0
2,2021-12-02 00:22:00,140.0,80.0,60.0,2,177
3,2021-12-02 01:16:00,135.0,78.0,57.0,2,177
4,2021-12-02 22:19:00,131.0,70.0,61.0,2,177


In [14]:
bpds.tail()

Unnamed: 0,date,systolic,diastolic,pulse pressure,heart_tablets,alcohol
107,2021-12-31 01:19:00,137.0,78.0,59.0,2,30
108,2021-12-31 21:59:00,114.0,70.0,44.0,2,30
109,2021-12-31 23:13:00,134.0,81.0,53.0,2,30
110,2022-01-01 00:11:00,135.0,79.0,56.0,2,106
111,2022-01-01 01:25:00,137.0,81.0,56.0,2,106


In [15]:
bpds.to_pickle(heart_and_externals_pickle)