In [19]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pathlib
import pickle
from scipy.stats import skew, kurtosis, pearsonr
from scipy.signal import butter, welch, filtfilt, resample
import copy
import time
import datetime
from multiprocessing.dummy import Pool as ThreadPool
from PreprocessFcns import *

%matplotlib inline

In [20]:
# Set path to folder containing Subject Records
path = r'//FS2.smpp.local\\RTO\\CIS-PD Study\MJFF Curation\Finalized Dataset'
# Set path to Destination Folder
dest = r'//FS2.smpp.local\\RTO\\CIS-PD Study\Patient Record Correlation'
#---------------------------------------------------------------------------------------------------------

In [21]:
# Medication Reports
table_med = 'Table10.csv'
dest_ext_med = 'Medication Reports'
file_name_med = 'med_timepoints.csv'

# Symptom Reports
table_symt = 'Table11.csv'
dest_ext_symt = 'Symptom Reports'
file_name_symt = 'symt_timepoints.csv'

# Diaries
table_diar = 'Table12.csv'
dest_ext_diar = 'Diaries'
file_name_diar = 'diar_timepoints.csv'

In [22]:
# Load Consolidated DataFrames
timepoints_med = pd.read_csv(os.path.join(dest, dest_ext_med, file_name_med))
timepoints_symt = pd.read_csv(os.path.join(dest, dest_ext_symt, file_name_symt))
timepoints_diar = pd.read_csv(os.path.join(dest, dest_ext_diar, file_name_diar))

# Group Record Types to Go Over All at Once
records = [timepoints_med, timepoints_symt, timepoints_diar]

In [None]:
for timepoints in records:
    
    # Designate Medication Record Type due to Differing Relevant Data Time Range
    # len(timepoints_med) = 24742
    if len(timepoints) > 20000 and len(timepoints) < 30000:
        
        # Set Destination Folder
        dest_ext = dest_ext_med
        
        for entry in range(len(timepoints)):

            # Designate Watch Acceleration File According to Subject and Month of Record Entry
            watch_filename = ('Table8_' + str(timepoints.loc[entry, 'SubjID']) + '_' + 
                              str(timepoints.loc[entry, 'Timestamp'])[:7] + '.csv')
            # Read Watch Acc File in Chunks
            watch_month_chunk = pd.read_csv(os.path.join(path, 'Table8', watch_filename), chunksize = 100000)

            # Initiate Empty DataFrame for Watch Data Near Patient Record
            watch_timepoint = pd.DataFrame(columns = ['SubjID', 'Timestamp', 'X', 'Y', 'Z'])

            # Sequential Indices
            i = 0
            # Changing Index of Chunk
            c = 0

            # Look at One Chunk at a Time
            for chunk in watch_month_chunk:

                for acc in range(len(chunk)):

                    # Add Acc Data Point to New DataFrame if Taken within Time Frame of Patient Record Entry
                    if (pd.Timestamp(chunk.loc[c+acc, 'Timestamp']) >= 
                        pd.Timestamp(timepoints.loc[entry, 'Timestamp']) + pd.Timedelta('-30 min') and 
                        pd.Timestamp(chunk.loc[c+acc, 'Timestamp']) <= pd.Timestamp(timepoints.loc[entry, 'Timestamp']) + 
                        pd.Timedelta('30 min')):
                        
                        watch_timepoint.loc[i] = [chunk.loc[c+acc, 'SubjID'], pd.Timestamp(chunk.loc[c+acc, 'Timestamp']), 
                                                  chunk.loc[c+acc, 'X'], chunk.loc[c+acc, 'Y'], chunk.loc[c+acc, 'Z']]
                        i += 1

                c += 100000

            # Save Each Compiled DataFrame of Acc Data Corresponding to a Patient Record Entry
            watch_timepoint.to_csv(os.path.join(dest, dest_ext, str(timepoints.loc[entry, 'SubjID']) + ' ' + 
                                                str(timepoints.loc[entry, 'Timestamp'])[:13] + 
                                                str(timepoints.loc[entry, 'Timestamp'])[14:16] + 
                                                str(timepoints.loc[entry, 'Timestamp'])[17:] + '.csv'), index = False)
        
    else:
        
        # Choose Destination Folder Based on Record Type Processing
        # len(timepoints_symt) = 4718
        if len(timepoints) > 75280:
            dest_ext = dest_ext_symt
        # len(timepoints_diar) = 4718
        if len(timepoints) < 30000:
            dest_ext = dest_ext_diar

        for entry in range(len(timepoints)):

            # Designate Watch Acceleration File According to Subject and Month of Record Entry
            watch_filename = ('Table8_' + str(timepoints.loc[entry, 'SubjID']) + '_' + 
                              str(timepoints.loc[entry, 'Timestamp'])[:7] + '.csv')
            # Read Watch Acc File in Chunks
            watch_month_chunk = pd.read_csv(os.path.join(path, 'Table8', watch_filename), chunksize = 100000)

            # Initiate Empty DataFrame for Watch Data Near Patient Record
            watch_timepoint = pd.DataFrame(columns = ['SubjID', 'Timestamp', 'X', 'Y', 'Z'])

            # Sequential Indices
            i = 0
            # Changing Index of Chunk
            c = 0

            # Look at One Chunk at a Time
            for chunk in watch_month_chunk:

                for acc in range(len(chunk)):

                    # Add Acc Data Point to New DataFrame if Taken within Time Frame of Patient Record Entry
                    if (pd.Timestamp(chunk.loc[c+acc, 'Timestamp']) >= 
                        pd.Timestamp(timepoints.loc[entry, 'Timestamp']) + pd.Timedelta('-30 min') and 
                        pd.Timestamp(chunk.loc[c+acc, 'Timestamp']) <= pd.Timestamp(timepoints.loc[entry, 'Timestamp'])): 
                        
                        watch_timepoint.loc[i] = [chunk.loc[c+acc, 'SubjID'], pd.Timestamp(chunk.loc[c+acc, 'Timestamp']), 
                                                  chunk.loc[c+acc, 'X'], chunk.loc[c+acc, 'Y'], chunk.loc[c+acc, 'Z']]
                        i += 1

                c += 100000

            # Save Each Compiled DataFrame of Acc Data Corresponding to a Patient Record Entry
            watch_timepoint.to_csv(os.path.join(dest, dest_ext, str(timepoints.loc[entry, 'SubjID']) + ' ' + 
                                                str(timepoints.loc[entry, 'Timestamp'])[:13] + 
                                                str(timepoints.loc[entry, 'Timestamp'])[14:16] + 
                                                str(timepoints.loc[entry, 'Timestamp'])[17:] + '.csv'), index = False)



## Add 'If Statement' to find exceptions when ranges carry across months.

In [17]:
### TEST CELL

# Print the Lengths of the Condensed Record Timepoints for Discerning Between Record Types Analyzed in Above Cell
print(len(timepoints_med))
print(len(timepoints_symt))
print(len(timepoints_diar))

24742
75280
4718
