## Loading libs

In [None]:
# import libs
import numpy as np
import pandas as pd
import pickle
import matplotlib.pylab as plt
from matplotlib.backends.backend_pdf import PdfPages
%run 'split.py'

## Process Data

In [None]:
# split into train / val / test + scale
RATIOS = [40, 2, 2]
SCALE = 5
LEN_SEQ = 200
LEN_PRED = 12
PATH_DATA = 'UM_data.pkl'

split_data = SplitData(PATH_DATA, ratio_list=RATIOS, enc_length=LEN_SEQ, frc_length=LEN_PRED, scale=SCALE)
train_data, val_data, test_data = split_data()

In [None]:
# check min segment length in train / val / test datasets
min = 10000
for i in range(38):
  for j in range(len(train_data[i])):
    if min > len(train_data[i][j]):
      min = len(train_data[1][0])

print(min)

In [None]:
# plot train data
%%capture
pdf = PdfPages(foldername+'/train_plots.pdf')

for i in range(38):
  for j in range(len(train_data[i])):
    fig, ax = plt.subplots(figsize=(20,10))
    plt.plot(np.arange(train_data[i][j].values.shape[0]), train_data[i][j].values)
    pdf.savefig(fig)

pdf.close()

In [None]:
# plot val data
%%capture
pdf = PdfPages(foldername+'/val_plots.pdf')

for i in range(38):
  for j in range(len(val_data[i])):
    fig, ax = plt.subplots(figsize=(20,10))
    plt.plot(np.arange(val_data[i][j].values.shape[0]), val_data[i][j].values)
    pdf.savefig(fig)

pdf.close()

In [None]:
# plot test data
%%capture
pdf = PdfPages(foldername+'/test_plots.pdf')

for i in range(38):
  for j in range(len(test_data[i])):
    fig, ax = plt.subplots(figsize=(20,10))
    plt.plot(np.arange(test_data[i][j].values.shape[0]), test_data[i][j].values)
    pdf.savefig(fig)

pdf.close()

In [None]:
# transform datetime to vector of integers, concatenate all segments, and store subject_id
DAYS_YEAR = 182.5
DAYS_MONTH = 15.5
DAYS_WEEK = 3.5
HOURS_DAY = 12.0
MINUTES_HOUR = 30.0
OFFSET = 1

train_data_enc = [[
                   i, 
                   seg_subj.values[:, np.newaxis], 
                   np.vstack([np.array([date.dayofyear / DAYS_YEAR - OFFSET, 
                                        date.day / DAYS_MONTH - OFFSET, 
                                        date.dayofweek / DAYS_WEEK - OFFSET, 
                                        date.hour / HOURS_DAY - OFFSET, 
                                        date.minute / MINUTES_HOUR - OFFSET], dtype = float) 
                              for date in seg_subj.index]),
                   seg_subj.index
                   ]  
                  for i in range(len(train_data)) 
                  for seg_subj in train_data[i]]

val_data_enc = [[
                 i, 
                 seg_subj.values[:, np.newaxis], 
                 np.vstack([np.array([date.dayofyear / DAYS_YEAR - OFFSET, 
                                      date.day / DAYS_MONTH - OFFSET, 
                                      date.dayofweek / DAYS_WEEK - OFFSET, 
                                      date.hour / HOURS_DAY - OFFSET, 
                                      date.minute / MINUTES_HOUR - OFFSET], dtype = float) 
                                 for date in seg_subj.index]),
                 seg_subj.index
                 ]  
                  for i in range(len(val_data)) 
                  for seg_subj in val_data[i]]

test_data_enc = [[
                  i, 
                  seg_subj.values[:, np.newaxis], 
                  np.vstack([np.array([date.dayofyear / DAYS_YEAR - OFFSET, 
                                       date.day / DAYS_MONTH - OFFSET, 
                                       date.dayofweek / DAYS_WEEK - OFFSET, 
                                       date.hour / HOURS_DAY - OFFSET, 
                                       date.minute / MINUTES_HOUR - OFFSET], dtype = float) 
                                 for date in seg_subj.index]),
                  seg_subj.index
                  ]  
                  for i in range(len(test_data)) 
                  for seg_subj in test_data[i]]

In [None]:
# save transformed data
with open(foldername + '/data/train_data.pkl', 'wb') as f:
    pickle.dump(train_data_enc, f)
with open(foldername + '/data/val_data.pkl', 'wb') as f:
    pickle.dump(val_data_enc, f)
with open(foldername + '/data/test_data.pkl', 'wb') as f:
    pickle.dump(test_data_enc, f)