### Subject data handling

In [139]:
# Import pickle and open Subject 1 file
# using 'latin1' encoding to pickled objects
import pickle
import pandas as pd
import numpy as np

with open('../ppg_heart_rate_estimation/Subjects/S4/S4.pkl', 'rb') as f:
    raw_data = pickle.load(f, encoding='latin1')

In [140]:
temp_raw_data = raw_data['signal']['wrist']['TEMP']
acc_raw_data = raw_data['signal']['wrist']['ACC']
ppg_raw_data = raw_data['signal']['wrist']['BVP']
label_raw_data = raw_data['label']

In [141]:
temp_data_len = len(temp_raw_data)
acc_data_len = len(acc_raw_data)
ppg_data_len = len(ppg_raw_data)
label_data_len = len(label_raw_data)
temp_data_len, acc_data_len, ppg_data_len, label_data_len

(36600, 292800, 585600, 4572)

In [142]:
acc_raw_data.shape, acc_raw_data

((292800, 3),
 array([[-0.484375, -0.09375 ,  0.875   ],
        [-0.484375, -0.09375 ,  0.875   ],
        [-0.484375, -0.09375 ,  0.875   ],
        ...,
        [-1.046875,  0.      ,  0.078125],
        [-0.9375  ,  0.296875,  0.28125 ],
        [-0.75    ,  0.28125 ,  0.421875]]))

In [143]:
# getting the 3 channels values to the first dimension
acc_data_converted = acc_raw_data.reshape(3*acc_data_len,1)
# converting it to a 1 dimensional array
acc_data_one_dim = acc_data_converted.flatten()
# # converting it to a non numpy array
acc_data_arr = acc_data_one_dim.tolist()
acc_data_arr[:9]

[-0.484375,
 -0.09375,
 0.875,
 -0.484375,
 -0.09375,
 0.875,
 -0.484375,
 -0.09375,
 0.875]

In [144]:
# matching all data to the label array length
(temp_data_len*4)/(4*8), (acc_data_len*4)/(32*8), (ppg_data_len*4)/(64*8), label_data_len

(4575.0, 4575.0, 4575.0, 4572)

In [145]:
upd = {}
for k in raw_data["signal"]["wrist"].keys():
    upd[k] = [a[0] for a in raw_data["signal"]["wrist"][k]]

temp_one_dim = upd['TEMP']
ppg_one_dim = upd['BVP']

In [146]:
# S2 - temperature dataframe
array_of_numbers = temp_one_dim
len(array_of_numbers)

length = 32
shift = 8 # 4 Hz * 2

temp_df = pd.DataFrame()

chunks = []

for i in range(0, len(array_of_numbers), shift):
    dat = array_of_numbers[i:i + length]
    chunks.append(dat)
    temp_df = pd.concat([temp_df, pd.DataFrame([dat])], ignore_index=True)
    
temp_df.shape

(4575, 32)

In [147]:
# checking
temp_raw_data[:9], temp_df

(array([[33.07],
        [33.07],
        [33.07],
        [33.07],
        [33.07],
        [33.07],
        [33.03],
        [33.03],
        [33.03]]),
          0      1      2      3      4      5      6      7      8      9   \
 0     33.07  33.07  33.07  33.07  33.07  33.07  33.03  33.03  33.03  33.03   
 1     33.03  33.03  33.05  33.05  33.05  33.05  33.05  33.05  33.05  33.05   
 2     33.05  33.05  33.05  33.05  33.05  33.05  33.07  33.07  33.07  33.07   
 3     33.07  33.07  33.09  33.09  33.09  33.09  33.05  33.05  33.05  33.05   
 4     33.05  33.05  33.05  33.05  33.05  33.05  33.03  33.03  33.03  33.03   
 ...     ...    ...    ...    ...    ...    ...    ...    ...    ...    ...   
 4570  34.21  34.21  34.23  34.23  34.23  34.23  34.21  34.21  34.21  34.21   
 4571  34.21  34.21  34.16  34.16  34.16  34.16  34.21  34.21  34.21  34.21   
 4572  34.21  34.21  34.16  34.16  34.16  34.16  34.18  34.18  34.18  34.18   
 4573  34.18  34.18  34.16  34.16  34.16  34.16  34.16 

In [148]:
# S2 - accelerometer dataframe
array_of_numbers = acc_data_arr

length = 768 # 256 data points in 8 seconds * 3 channels
shift = 192 # 32 Hz * 3 channels * 2 shifts

acc_df = pd.DataFrame()

for i in range(0, len(array_of_numbers), shift):
    dat = array_of_numbers[i:i + length]
    # chunks.append(dat)
    acc_df = pd.concat([acc_df, pd.DataFrame([dat])], ignore_index=True)
    
acc_df.shape

(4575, 768)

In [149]:
# checking
acc_raw_data[:6], acc_df

(array([[-0.484375, -0.09375 ,  0.875   ],
        [-0.484375, -0.09375 ,  0.875   ],
        [-0.484375, -0.09375 ,  0.875   ],
        [-0.484375, -0.09375 ,  0.859375],
        [-0.484375, -0.09375 ,  0.875   ],
        [-0.46875 , -0.09375 ,  0.875   ]]),
            0         1         2         3         4         5         6    \
 0    -0.484375 -0.093750  0.875000 -0.484375 -0.093750  0.875000 -0.484375   
 1    -0.687500  0.265625  0.750000 -0.703125  0.265625  0.718750 -0.625000   
 2    -0.406250  0.421875  0.828125 -0.406250  0.421875  0.812500 -0.406250   
 3    -0.671875  0.515625  0.546875 -0.687500  0.531250  0.515625 -0.718750   
 4    -0.640625  0.703125  0.578125 -0.718750  0.796875  0.656250 -0.750000   
 ...        ...       ...       ...       ...       ...       ...       ...   
 4570 -1.046875 -0.156250 -0.296875 -1.171875 -0.109375 -0.796875 -1.234375   
 4571 -0.984375 -0.281250  0.125000 -0.968750 -0.281250  0.125000 -0.968750   
 4572 -0.968750 -0.281250  0.

In [150]:
# ppg dataframe
array_of_numbers = ppg_one_dim
len(array_of_numbers)

length = 512 # data points in 8 seconds
shift = 128 # 64 Hz * 2 shifts

ppg_df = pd.DataFrame()

for i in range(0, len(array_of_numbers), shift):
    dat = array_of_numbers[i:i + length]
    # chunks.append(dat)
    ppg_df = pd.concat([ppg_df, pd.DataFrame([dat])], ignore_index=True)
    
ppg_df.shape

(4575, 512)

In [151]:
#checking
ppg_raw_data[:9], ppg_df

(array([[ 8.83],
        [10.94],
        [12.34],
        [13.02],
        [13.04],
        [12.54],
        [11.75],
        [10.93],
        [10.31]]),
          0       1       2      3      4      5      6      7       8    \
 0       8.83   10.94   12.34  13.02  13.04  12.54  11.75  10.93   10.31   
 1      -0.44   -1.53   -2.14  -2.32  -2.30  -2.36  -2.76  -3.61   -4.81   
 2      59.35   56.68   52.72  48.04  43.34  39.11  35.38  31.80   27.93   
 3     -48.54  -46.37  -46.52 -47.65 -48.33 -47.49 -44.72 -40.26  -34.78   
 4     -14.19  -12.43   -9.53  -5.55  -0.98   3.27   5.97   5.96    2.48   
 ...      ...     ...     ...    ...    ...    ...    ...    ...     ...   
 4570   -1.46   -1.48   -2.00  -3.08  -4.67  -6.59  -8.63 -10.53  -12.17   
 4571  138.45  125.94  111.83  94.29  71.05  39.84  -0.30 -49.96 -105.37   
 4572   -9.37   -5.16   -1.41   1.64   3.92   5.46   6.41   6.91    7.06   
 4573  109.31   84.75   56.48  29.08   5.79 -12.11 -24.92 -33.57  -38.85   
 4574    

In [152]:
# label dataframe
# converting to one dimension
label_arr = label_raw_data.tolist()
#creating the dataframe
label_df = pd.DataFrame(label_arr)
label_df.shape

(4572, 1)

In [153]:
#checking
label_raw_data[:9], label_df

(array([69.33133569, 74.23985397, 80.68656756, 83.98705209, 86.64274123,
        87.26296543, 85.31018458, 81.27098202, 75.13336681]),
               0
 0     69.331336
 1     74.239854
 2     80.686568
 3     83.987052
 4     86.642741
 ...         ...
 4567  94.803254
 4568  86.073423
 4569  83.092390
 4570  84.577227
 4571  88.221438
 
 [4572 rows x 1 columns])

In [154]:
frames = [ppg_df, acc_df, temp_df, label_df]

result_df = pd.concat(frames, axis=1, ignore_index=True)
result_df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,1303,1304,1305,1306,1307,1308,1309,1310,1311,1312
0,8.83,10.94,12.34,13.02,13.04,12.54,11.75,10.93,10.31,10.02,...,33.07,33.07,33.07,33.09,33.09,33.09,33.09,33.05,33.05,69.331336
1,-0.44,-1.53,-2.14,-2.32,-2.30,-2.36,-2.76,-3.61,-4.81,-6.06,...,33.05,33.05,33.05,33.05,33.05,33.05,33.05,33.03,33.03,74.239854
2,59.35,56.68,52.72,48.04,43.34,39.11,35.38,31.80,27.93,23.51,...,33.03,33.03,33.03,33.07,33.07,33.07,33.07,33.05,33.05,80.686568
3,-48.54,-46.37,-46.52,-47.65,-48.33,-47.49,-44.72,-40.26,-34.78,-29.07,...,33.05,33.05,33.05,33.05,33.05,33.05,33.05,33.00,33.00,83.987052
4,-14.19,-12.43,-9.53,-5.55,-0.98,3.27,5.97,5.96,2.48,-4.70,...,33.00,33.00,33.00,33.03,33.03,33.03,33.03,33.00,33.00,86.642741
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4570,-1.46,-1.48,-2.00,-3.08,-4.67,-6.59,-8.63,-10.53,-12.17,-13.56,...,34.18,34.18,34.18,34.16,34.16,34.16,34.16,34.16,34.16,84.577227
4571,138.45,125.94,111.83,94.29,71.05,39.84,-0.30,-49.96,-105.37,-160.51,...,34.16,34.16,34.16,34.21,34.21,34.21,34.21,34.16,34.16,88.221438
4572,-9.37,-5.16,-1.41,1.64,3.92,5.46,6.41,6.91,7.06,6.90,...,34.16,,,,,,,,,
4573,109.31,84.75,56.48,29.08,5.79,-12.11,-24.92,-33.57,-38.85,-41.19,...,,,,,,,,,,


In [174]:
# result_df.to_csv('../ppg_heart_rate_estimation/Subjects/S4/S4.csv', index=False)