Copyright ©2022. Stephen Rigden.
This program is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation, either version 3 of the License, or (at your option) any later version.
This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more details.
You should have received a copy of the GNU General Public License along with this program.  If not, see <http://www.gnu.org/licenses/>.

# Create Dummy Data

The created data file is intended to assist new users by providing data similar to
that used to create the notebooks of this project.
This will be helpful in cases where the user's data is markedly different in shape or size and
requires substantial modification of the notebook. The dummy data will provide confidence in the
correctness of the notebooks as a basis for modification for the user's non-conforming data.

In [67]:
from pathlib import Path

import numpy
import pandas

In [68]:
PERIODS = 6
LUNAR_MONTH = 28
DAILY_PRESSURE_REFORDS = 4
APPLE_DIASTOLIC_TYPE = 'HKQuantityTypeIdentifierBloodPressureDiastolic'
APPLE_SYSTOLIC_TYPE = 'HKQuantityTypeIdentifierBloodPressureSystolic'
SOURCE_NAME = 'Mock Data Generator'
SOURCE_VERSION = '1e-googolplex'
UNIT = 'mm Hg'
DEVICE = 'NOVICE'

In [69]:
project_path = Path.cwd().parent.parent
iphone_file = project_path / 'data' / 'raw' / 'export.xml'

# Meta control dataset

Create a dataset with six 28 day periods

In [70]:
ts = pandas.date_range('2020-06-01', periods=PERIODS, freq=f'{LUNAR_MONTH}D')
ts

DatetimeIndex(['2020-06-01', '2020-06-29', '2020-07-27', '2020-08-24',
               '2020-09-21', '2020-10-19'],
              dtype='datetime64[ns]', freq='28D')

In [71]:
mds = pandas.DataFrame({'period_start': ts,
                        'systolic_μ': [120, 130, 140, 140, 130, 120],
                        'systolic_σ': [12, 12, 12, 12, 12, 12],
                        'diastolic_μ': [80, 85, 90, 90, 85, 80],
                        'diastolic_σ': [6, 6, 6, 6, 6, 6]},
                       # index=ts
                       )
mds

Unnamed: 0,period_start,systolic_μ,systolic_σ,diastolic_μ,diastolic_σ
0,2020-06-01,120,12,80,6
1,2020-06-29,130,12,85,6
2,2020-07-27,140,12,90,6
3,2020-08-24,140,12,90,6
4,2020-09-21,130,12,85,6
5,2020-10-19,120,12,80,6


In [72]:
pressure_ds = pandas.DataFrame()

for ix in range(len(mds)):
    # Create time series of quarter days for a lunar month
    quarter_days = LUNAR_MONTH * DAILY_PRESSURE_REFORDS
    frequency = '6H'
    pts = pandas.date_range(mds.period_start[ix], periods=quarter_days, freq=frequency)

    # Create systolic dataset
    mu = mds.systolic_μ[0]
    sigma = mds.systolic_σ[0]
    count = len(pts)
    rng_s = numpy.random.default_rng().normal(mu, sigma, count).astype('int')
    sds = pandas.DataFrame({'creationDate': pts, 'startDate': pts, 'endDate': pts, 'value': rng_s,
                            'type': APPLE_SYSTOLIC_TYPE, 'sourceName': SOURCE_NAME,
                            'sourceVersion': SOURCE_VERSION, 'unit': UNIT, 'device': DEVICE})

    # Create diastolic dataset
    mu = mds.diastolic_μ[0]
    sigma = mds.diastolic_σ[0]
    count = len(pts)
    rng_d = numpy.random.default_rng().normal(mu, sigma, count).astype('int')
    dds = pandas.DataFrame({'creationDate': pts, 'startDate': pts, 'endDate': pts, 'value': rng_d,
                            'type': APPLE_DIASTOLIC_TYPE, 'sourceName': SOURCE_NAME,
                            'sourceVersion': SOURCE_VERSION, 'unit': UNIT, 'device': DEVICE})

    # Merge systolic and diastolic datasets
    ds = sds.append(dds, ignore_index=True, verify_integrity=True)
    ds.sort_values('startDate', inplace=True)

    pressure_ds = pressure_ds.append(ds, ignore_index=True, verify_integrity=True)

pressure_ds.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1344 entries, 0 to 1343
Data columns (total 9 columns):
 #   Column         Non-Null Count  Dtype         
---  ------         --------------  -----         
 0   creationDate   1344 non-null   datetime64[ns]
 1   startDate      1344 non-null   datetime64[ns]
 2   endDate        1344 non-null   datetime64[ns]
 3   value          1344 non-null   int64         
 4   type           1344 non-null   object        
 5   sourceName     1344 non-null   object        
 6   sourceVersion  1344 non-null   object        
 7   unit           1344 non-null   object        
 8   device         1344 non-null   object        
dtypes: datetime64[ns](3), int64(1), object(5)
memory usage: 94.6+ KB


In [73]:
pressure_ds.head()

Unnamed: 0,creationDate,startDate,endDate,value,type,sourceName,sourceVersion,unit,device
0,2020-06-01 00:00:00,2020-06-01 00:00:00,2020-06-01 00:00:00,120,HKQuantityTypeIdentifierBloodPressureSystolic,Mock Data Generator,1e-googolplex,mm Hg,NOVICE
1,2020-06-01 00:00:00,2020-06-01 00:00:00,2020-06-01 00:00:00,75,HKQuantityTypeIdentifierBloodPressureDiastolic,Mock Data Generator,1e-googolplex,mm Hg,NOVICE
2,2020-06-01 06:00:00,2020-06-01 06:00:00,2020-06-01 06:00:00,105,HKQuantityTypeIdentifierBloodPressureSystolic,Mock Data Generator,1e-googolplex,mm Hg,NOVICE
3,2020-06-01 06:00:00,2020-06-01 06:00:00,2020-06-01 06:00:00,81,HKQuantityTypeIdentifierBloodPressureDiastolic,Mock Data Generator,1e-googolplex,mm Hg,NOVICE
4,2020-06-01 12:00:00,2020-06-01 12:00:00,2020-06-01 12:00:00,112,HKQuantityTypeIdentifierBloodPressureSystolic,Mock Data Generator,1e-googolplex,mm Hg,NOVICE


In [74]:
pressure_ds.to_xml(iphone_file, index=False, parser='lxml')