# Overview
The task is to identify sleep patterns given samples of accelerometer data. 

In [1]:
# Render our plots inline
%matplotlib inline

import numpy as np
import h5py
import pandas as pd
import datetime 
import matplotlib.pyplot as plt
from pandas import HDFStore, DataFrame
import itertools

# Functions for Loading data
- load_pa_data gets the sensor data from the h5 file after it has been processed using the LSTM model; it converts it to a dataframe with the accelerometer data and the prediction
- load_sleep_timings gets the start and end timings from a separate file. Those time are matched to the "sleep profile.txt" file that comes in PSG_analyis/sleep profile.txt
- load_move_timing gets the amount the accelerometer data has shifted (with regards to timestamps) from the original raw psg data. It is found in a separate file called 'Move...txt'.
- load_pa_data_match_sleep_class loads the pa_data (via load_pa_data), selects only the night for which scoring is available (using the manually set timings) and then loads shift score and the sleep scores; the pa_data is shifted according to the move score and the sleep scores are then merged to the pa_data where each row gets the closest score with a tolerance of 30sec (that's the sleep scoring frequency); in the begining there is some "A" labels that are removed

In [5]:
def load_pa_data(subject_number):
    filename = 'D:/path/ID' + subject_number + '/PSG_' + subject_number + '_timestamped_predictions.h5'
    hf = h5py.File(filename, 'r')

    data = pd.read_hdf(filename)
    hf.close()
    #Set the timestamp as index
    data = data.set_index('timestamp')
    # filter data; remove confidence for the further processing
    data = data[['prediction','back_x' ,'back_y' ,'back_z', 'thigh_x' ,'thigh_y' ,'thigh_z']]
    # renaming columns
    data.columns = ['prediction','back_x' ,'back_y' ,'back_z', 'thigh_x' ,'thigh_y' ,'thigh_z']
    return data

def load_sleep_timings():
    return pd.read_csv('D:/path/sleep_guide.csv', index_col='id')

def load_move_timings():
    return pd.read_csv('D:/path/move_guide.csv', index_col='id')


def load_pa_data_match_sleep_class(subject_id):
    timing = load_sleep_timings()
    move = load_move_timings()

    pa_data = load_pa_data(str(subject_id))
    subj_night = pa_data.loc[timing.loc[subject_id].start:timing.loc[subject_id].end].copy()
    
    #shift data
    shifted_data = subj_night.copy()
    shift_x_in_sec = int(move.loc[subject_id]['shift'])
    shifted_data = shifted_data.shift(-shift_x_in_sec * 100)
    
    #get sleep scores
    sleep_profile = pd.read_csv('D:/path/ID'+str(subject_id)+'/PSG_analysis/Sleep profile.txt', header=None, sep=';', 
                                names=['timestamp','sleep_class'], skiprows=7)
    
    # generate timestamps from start to end with a 30s freq (30 s because we have a classification in the freq)
    sleep_profile['timestamp'] = pd.date_range(start=timing.loc[subject_id].start, end=timing.loc[subject_id].end, freq='30s')


    pa_sc = pd.merge_asof(shifted_data, sleep_profile, left_index=True, right_on='timestamp', tolerance=pd.Timedelta('30s'))
    pa_sc = pa_sc[pa_sc.sleep_class != ' A']
    pa_sc = pa_sc.set_index('timestamp')
    pa_sc = pa_sc.dropna().copy()
    return pa_sc

# Merge data

In [3]:
def merge_scores():
    
    subject_ids=[1, 6 ,14, 15, 18, 19, 20, 23, 24, 25, 27, 28, 29, 30, 31, 32, 35, 37, 39]
    
    for id in subject_ids:
        data = load_pa_data_match_sleep_class(id)
        
        filename = 'PSG' + str(id) + '_merged_data.h5'
        data.to_hdf(filename, key = 'data')
        

In [4]:
#merge_scores()