# Overview
The task is to identify sleep patterns given samples of accelerometer data. 

In [1]:
# Render our plots inline
%matplotlib inline

import numpy as np
import h5py
import pandas as pd
import datetime 
import matplotlib.pyplot as plt
from pandas import HDFStore, DataFrame
import itertools

# Functions for Loading data


In [2]:
#Loads the target information
def load_targets(subject_number):
    path = 'D:/path/ID'+str(subject_number)+'/PSG'+ str(subject_number) +'_targets.h5'
    hf = h5py.File(path, 'r')

    reread = pd.read_hdf(path)
    targets = reread#.head(10000)
    hf.close()
    
    return targets

#Loads the timestamps
def load_timestamps(subject_number):
    path = 'D:/path/ID'+str(subject_number)+'/PSG'+ str(subject_number) +'_timestamps.h5'
    hf = h5py.File(path, 'r')

    reread = pd.read_hdf(path)
    timestamps = reread#.head(10000)
    hf.close()
    
    return timestamps

# Merge scores

In [3]:
#Edits the start time so that it represents the 30 second epoch it is within
def edit_start_time(x):
    
    words = x.split(':')
    
    if int(words[2]) <30:
        words[2] = '00'
    else:
        words[2] = '30'
    
    time = words[0]+':'+words[1]+':'+words[2]
    
    return time

#Edits the end time so that it represents the 30 second epoch it is within
def edit_end_time(x):
    
    s = x.split('-')

    words = s[1].split(':')
    
    if int(words[2]) <30:
        words[2] = '00'
    else:
        words[2] = '30'
    
    time = words[0]+':'+words[1]+':'+ words[2]
    
    return time

#Edits the type and returns it in the wanted format
def edit_type(x):
    
    s = x.split(';')
    
    words = s[2].split()
    
    a_type = words[0] # For arousal and PLM
    #a_type = words[len(words)-1] for EEG or EMG
    
    return a_type
    
#Loads the arousal data and edits the data so only the relevant infomartion is returned
def load_arousals(subject_id):


    arousals = pd.read_csv('D:/Masteroppgave/Data/ID'+str(subject_id)+'/PSG_analysis/Classification Arousals.txt', header=None, sep=',', 
                                names=['start','end','type'], skiprows=5)
    
    arousals['start'] = arousals['start'].apply(lambda x: edit_start_time(x))
    
    arousals['end'] = arousals['end'].apply(lambda x: edit_end_time(x))
    
    #arousals = arousals.drop(['type'], axis=1)
    arousals['type'] = arousals['type'].apply(lambda x: edit_type(x))
    
    
    return arousals

#Return a list of containing the timestamps of when the wanted arousal types occured
def get_arousals(subject_id, wanted_type):
    
    a = load_arousals(subject_id)
    
    x = a['start'].values
    y = a['end'].values
    #time = x.tolist() + y.tolist()
    
    t = a['type'].values
    
    time = []
    
    for i in range(len(t)):
        a_type = t[i]
        #print(a_type)
        
        if a_type == wanted_type: #for more specific selection, can be removed if you want all arousal types
            time.append(x[i])
            time.append(y[i])
    
    
    df = pd.DataFrame(data=time)
    df_r = DataFrame.drop_duplicates(df)
    
    arousals = df_r[0].values.tolist()
    
    
    return arousals
    

In [4]:
import copy
# Updates the target(sleep_class) list based on the list of arousal occurences. Arousals are represented by a 0
def get_updated_targets(subject_id, arousals):
    
    targets = load_targets(subject_id).values.tolist()
    timestamps = load_timestamps(subject_id)
    
    for i in range(len(timestamps)):
        stamp = copy.copy(str(timestamps[0][i]))
        
        x = stamp.split(' ')[1]
        time = x.split('.')

        if time[0] in arousals:
            #if targets[i][0] == 1: if you do not want arousal during classified wake labels
            targets[i][0] = 0
    
        
    return targets
    

In [5]:
subject_ids =[1, 6 ,14, 15, 18, 19, 20, 23, 24, 25, 27, 28, 29, 30, 31, 32, 35, 37, 39]

def merge_data(subject_ids):
    
    for subject in subject_ids:
        arousals = get_arousals(subject, 'PLM')
        targets = get_updated_targets(subject, arousals)

        merged_data = pd.DataFrame(data=targets)
        filename = 'PSG' + str(subject)  + '_targets_PLMs.h5'
        merged_data.to_hdf(filename, key = 'targetsPLM')

In [6]:
#merge_data(subject_ids)