# Download Apple Watch Data

Example for how to download accelerometry, rotation, tremor, and dyskinesia from Apple Watch.

In [1]:
import io
import os
import pandas as pd
import matplotlib.pyplot as plt
import datetime
from runeq import Config, stream

In [2]:
def make_full_df(accessor):
    """Loop through pages of API calls and append to a single dataframe"""

    df = pd.DataFrame()
    for page in accessor.iter_csv_text():
        page_df = pd.read_csv(io.StringIO(page))
        df = df.append(page_df, ignore_index=True)

    return df


def get_accel(client, params, save_filepath):
    """Makes API calls for accel data, saves to CSV and outputs dataframe"""

    accessor = client.Accel(**params)
    df = make_full_df(accessor)
    df.to_csv(save_filepath, index=False)

    return df


def get_rotation(client, params, save_filepath):
    """Makes API calls for rotation data, saves to CSV and outputs dataframe"""

    accessor = client.Rotation(**params)
    df = make_full_df(accessor)
    df.to_csv(save_filepath, index=False)

    return df


def get_tremor(client, params, save_filepath):
    """Makes API calls for tremor data, saves to CSV and outputs dataframe"""

    accessor = client.ProbabilitySymptom(
        symptom='tremor',
        severity='*',
        **params
    )
    df = make_full_df(accessor)
    df.to_csv(save_filepath, index=False)

    return df


def get_dyskinesia(client, params, save_filepath):
    """
    Makes API calls for dyskinesia data, saves to CSV and outputs
    dataframe
    """

    accessor = client.ProbabilitySymptom(
        symptom='dyskinesia',
        **params
    )
    df = make_full_df(accessor)
    df.to_csv(save_filepath, index=False)

    return df

In [3]:
# Define a function here to download all desired data by days...
#  It should slice up the timestamps into days and call the get
#  function for each desired data stream.

def get_data_by_days(client, params, BASE_PATH, base_filename, data_streams):
    # Create a copy of params for this scope:
    day_params = params.copy()
    
    # Convert the timestamps to datetime objects for manipulation:
    start_time = datetime.datetime.fromtimestamp(params['start_time'])
    end_time = datetime.datetime.fromtimestamp(params['end_time'])
    
    # Calculate the number of days in the given window:
    ndays = (end_time - start_time).days
    
    # Loop over days in the span
    for day in range(ndays):
        # Set up params for one day:
        start_day = start_time + datetime.timedelta(days=day)
        day_params['start_time'] = start_day.timestamp()
        end_day = start_day + datetime.timedelta(days=1)
        day_params['end_time'] = end_day.timestamp()
        
        if 'accel' in data_streams:
            filename = base_filename + start_day.strftime('%Y_%m_%d') + '_accel.csv'
            # Use the standard get function to save, but trash the returned dataframe:
            _ = get_accel(
                client,
                day_params,
                os.path.join(BASE_PATH, filename)
            )
            print(f'Finished downloading: {filename}')
    
        if 'rotation' in data_streams:
            filename = base_filename + start_day.strftime('%Y_%m_%d') + '_rotation.csv'
            # Use the standard get function to save, but trash the returned dataframe:
            _ = get_rotation(
                client,
                day_params,
                os.path.join(BASE_PATH, filename)
            )
            print(f'Finished downloading: {filename}')
    
        if 'tremor' in data_streams:
            filename = base_filename + start_day.strftime('%Y_%m_%d') + '_tremor.csv'
            # Use the standard get function to save, but trash the returned dataframe:
            _ = get_tremor(
                client,
                day_params,
                os.path.join(BASE_PATH, filename)
            )
            print(f'Finished downloading: {filename}')
    
        if 'dyskinesia' in data_streams:
            filename = base_filename + start_day.strftime('%Y_%m_%d') + '_dyskinesia.csv'
            # Use the standard get function to save, but trash the returned dataframe:
            _ = get_dyskinesia(
                client,
                day_params,
                os.path.join(BASE_PATH, filename)
            )
            print(f'Finished downloading: {filename}')
    
    return None
       
    # Note, if the dates are inclusive (i.e., through the second date),
    #  use the date afterward, as it is the first timestamp on that date.
    #  But this would only work if no hour/minute values are being used...
    #  If they were expected, we'd have to use the original end date,
    #  but default the hours/minutes/etc. to their max value.

In [4]:
# set up client

cfg = Config()
client = stream.V1Client(cfg)

## Set up structure for breaking a long window of time into days

The user should be able to set a range of time (up to months), and download all data from that range, with each day being saved to a separate, but intuitively-named .csv file.

(so that we get something like: RCS02L_2020_10_01_acc_.csv)

Times of interest:

**RCS02**
* Open Loop:
Left: 5/26/20-5/30/20
* Closed Loop:
Left: 6/10/20-6/15/20

**RCS05**:
* Left: 6/16/20 - 6/20/20
* Right: 6/16/20, 6/18/20, 6/19/20

**RCS06**:
* Open Loop:
Both sides: 6/4/20-6/5/20
* Closed Loop:
Left: 6/15

**RCS07**:
* Both: 6/23/20-6/27/20

**RCS08**:
* Both: 6/23/20-6/27/20


**+ Everything from this week (week of Sept 20) from**: 
RCS02, RCS05, RCS07, RCS08 

*(I'm guessing the week is sun-sat, Sept. 20-26... and "everything" is both watches?)*

**Define the desired list of data streams to download:**

Possible options include:
* Acceleration: 'accel'
* Rotation: 'rotation'
* Symptom probability for tremor: 'tremor'
* Symptom probability for dyskinesia: 'dyskinesia'

(Later add symptom prob by severity, heart rate, etc.)

In [5]:
data_streams = ['accel','rotation','tremor','dyskinesia']

In [6]:
# Specify patient ids and device ids.

subjects = [
    # 'rcs02',
    # 'rcs05',
    # 'rcs06',
    # 'rcs07',
    'rcs08'
]

patient_ids = {
    'rcs02': 'RCS02',
    'rcs05': 'RCS05',
    'rcs06': 'rcs06',
    'rcs07': 'rcs07',
    'rcs08': 'ee8ad14f051f4ff0bccab5c98171067f'
}

device_ids = {
    'rcs02': {
        'L': 'S9Nj2M1Q',
        'R': 'KrCk6N6q'
    },
    'rcs05': {
        'L': 'fkFVvXvO',
        'R': 'yFZe7BQZ'
    },
    'rcs06': {
        'L': 'CWTFFG3f',
        'R': 'EK5cDAZ1'
    },
    'rcs07': {
        'L': '0Z48CCdv',
        'R': 'RElEtNme'
    },
    'rcs08': {
        'L': 'WJzJ4*NA',
        'R': 'fMQ45ejQ'
    }
}

For now I'm testing code below, but will then polish it into a function that cuts a time window into days and stores the timestamps in a usable structure.

In [7]:
# Starting with brute force to build this structure.
#  Write code to more elegantly build it from user input later...

patient_time_windows = {
    'rcs02': {
        'start': [
            datetime.datetime(2020, 5, 26).timestamp(),
            datetime.datetime(2020, 6, 10).timestamp(),
            datetime.datetime(2020, 9, 20).timestamp()
        ],
        'stop': [
            datetime.datetime(2020, 5, 30).timestamp(),
            datetime.datetime(2020, 6, 15).timestamp(),
            datetime.datetime(2020, 9, 26).timestamp()
        ],
        'side': [
            'L',
            'L',
            'Both'
        ]
    },
    'rcs05': {
        'start': [
            datetime.datetime(2020, 6, 16).timestamp(),
            datetime.datetime(2020, 6, 16).timestamp(),
            datetime.datetime(2020, 6, 18).timestamp(),
            datetime.datetime(2020, 6, 19).timestamp(),
            datetime.datetime(2020, 9, 20).timestamp()
        ],
        'stop': [
            datetime.datetime(2020, 6, 20).timestamp(),
            datetime.datetime(2020, 6, 16).timestamp(),
            datetime.datetime(2020, 6, 18).timestamp(),
            datetime.datetime(2020, 6, 19).timestamp(),
            datetime.datetime(2020, 9, 26).timestamp()
        ],
        'side': [
            'L',
            'R',
            'R',
            'R',
            'Both'
        ]
    },
    'rcs06': {
        'start': [
            datetime.datetime(2020, 6, 4).timestamp(),
            datetime.datetime(2020, 6, 15).timestamp()
        ],
        'stop': [
            datetime.datetime(2020, 6, 5).timestamp(),
            datetime.datetime(2020, 6, 15).timestamp()
        ],
        'side': [
            'Both',
            'L'
        ]
    },
    'rcs07': {
        'start': [
            datetime.datetime(2020, 6, 23).timestamp(),
            datetime.datetime(2020, 9, 20).timestamp()
        ],
        'stop': [
            datetime.datetime(2020, 6, 27).timestamp(),
            datetime.datetime(2020, 9, 26).timestamp()
        ],
        'side': [
            'Both',
            'Both'
        ]
    },
    'rcs08': {
        'start': [
            datetime.datetime(2020, 6, 23).timestamp(),
            datetime.datetime(2020, 9, 20).timestamp()
        ],
        'stop': [
            datetime.datetime(2020, 6, 27).timestamp(),
            datetime.datetime(2020, 9, 26).timestamp()
        ],
        'side': [
            'Both',
            'Both'
        ]
    },
    # Example below of including finer resolution with hour/minute/etc. keyword args
    #'rcs05': {'start': datetime.datetime(2020, 10, 3, hour=10, minute=30).timestamp(),
    #          'stop': datetime.datetime(2020, 10, 3, hour=13, minute=30).timestamp()
    #         }
}

# Example for indexing of the above structure:
#patient_time_windows['rcs02']['start'][0]

In [8]:
# specify base path for saving data

#BASE_PATH = '/Users/roee/Downloads/watch'
BASE_PATH = '~/Documents/api_data/'

In [None]:
# Loop over the above defined patient_time_windows dict,
#  for each subject in subjects,
#  downloading all desired data.

for subject in subjects:
    # Loop through each entry in the time windows dict, using 'side'.
    #  (assumes the same number of starts, stops, and sides)
    for idx,side in enumerate(patient_time_windows[subject]['side']):
        thisstart = patient_time_windows[subject]['start'][idx]
        thisstop = patient_time_windows[subject]['stop'][idx]
        
        # Set up the common portion of the parameters:
        params = {
                'patient_id': patient_ids[subject],
                # 'device_id' is set below
                'start_time': thisstart,
                'end_time': thisstop,
                'timestamp': 'datetime'
                #'timestamp': 'unix'
        }
        
        # Handle instances in which both left and right are desired:
        #  (this could be more elegant...)
        if side == 'Both':
            # Download data for left device:
            params['device_id'] = device_ids[subject]['L']
            base_filename = subject + 'L_'
            get_data_by_days(
                client,
                params,
                BASE_PATH,
                base_filename,
                data_streams
            )
            
            # Download data for right device:
            params['device_id'] = device_ids[subject]['R']
            base_filename = subject + 'R_'
            get_data_by_days(
                client,
                params,
                BASE_PATH,
                base_filename,
                data_streams
            )
        else:
            # Download data for whatever device 'side' is:
            params['device_id'] = device_ids[subject][side]
            base_filename = subject + side + '_'
            get_data_by_days(
                client,
                params,
                BASE_PATH,
                base_filename,
                data_streams
            )

Finished downloading: rcs08L_2020_06_23_accel.csv
Finished downloading: rcs08L_2020_06_23_rotation.csv
Finished downloading: rcs08L_2020_06_23_tremor.csv
Finished downloading: rcs08L_2020_06_23_dyskinesia.csv
