In [1]:
ref='nb7-' #Note to matt

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
import seaborn as sns
import copy


import random

import matplotlib

plt.style.use('seaborn-whitegrid')
matplotlib.rcParams.update({
    'font.family': 'serif',
    'axes.titlesize':16,
    'axes.labelsize':16,
    'axes.xmargin':0.1,
    'axes.ymargin':0.1,
    'legend.fontsize':16,
    'xtick.labelsize' : 16,
    'ytick.labelsize' : 16,
    'lines.markersize': 10,
    'lines.linewidth' : 3,
    'font.size': 16
})



# Importing Data

In [2]:

raw_data=pd.read_csv("../data/processed/gathered_w_rms.csv",index_col=False )

raw_data

Unnamed: 0,stamp,SubjectId,RunId,x,y,z,speed,walk,rms
0,1.527700e+09,52.0,1.0,-426.0,-102.0,124.0,0.133031,52-1,262.840890
1,1.527700e+09,52.0,1.0,-413.0,-81.0,124.0,0.133031,52-1,253.315350
2,1.527700e+09,52.0,1.0,-394.0,-69.0,126.0,0.133031,52-1,242.124624
3,1.527700e+09,52.0,1.0,-367.0,-52.0,122.0,0.133031,52-1,225.297581
4,1.527700e+09,52.0,1.0,-340.0,-36.0,118.0,0.133031,52-1,208.822093
...,...,...,...,...,...,...,...,...,...
2079219,1.528365e+09,988.0,2.0,336.0,4.0,0.0,0.056256,988-2,194.003436
2079220,1.528365e+09,988.0,2.0,332.0,6.0,2.0,0.056256,988-2,191.715066
2079221,1.528365e+09,988.0,2.0,328.0,3.0,6.0,0.056256,988-2,189.410489
2079222,1.528365e+09,988.0,2.0,330.0,6.0,0.0,0.056256,988-2,190.557078


In [3]:
cleaned_data=pd.read_csv("../data/processed/cleaned_w_interp.csv",index_col=False )
cleaned_data

Unnamed: 0,stamp,SubjectId,RunId,walk,speed,x,y,z,rms
0,1.527700e+09,52.0,1.0,52-1,0.1,-426.0,-102.0,124.0,262.840890
1,1.527700e+09,52.0,1.0,52-1,0.1,-413.0,-81.0,124.0,253.315350
2,1.527700e+09,52.0,1.0,52-1,0.1,-394.0,-69.0,126.0,242.124624
3,1.527700e+09,52.0,1.0,52-1,0.1,-367.0,-52.0,122.0,225.297581
4,1.527700e+09,52.0,1.0,52-1,0.1,-340.0,-36.0,118.0,208.822093
...,...,...,...,...,...,...,...,...,...
2039963,1.528365e+09,988.0,2.0,988-2,0.1,336.0,4.0,0.0,194.003436
2039964,1.528365e+09,988.0,2.0,988-2,0.1,332.0,6.0,2.0,191.715066
2039965,1.528365e+09,988.0,2.0,988-2,0.1,328.0,3.0,6.0,189.410489
2039966,1.528365e+09,988.0,2.0,988-2,0.1,330.0,6.0,0.0,190.557078


In [4]:
def eliminate_discontinuity(df):
    # Extract individual walks
    subject_id=df['SubjectId'].values
    walks=list(df.drop_duplicates(subset=['walk'])['walk'].values)
    lags=[]
    for walk in walks:
        walk_data=df[df['walk']==walk]
        stamps=walk_data['stamp'].values
        diff=np.diff(stamps)
        time_lag=np.where(diff<0.015,0,diff )
        lag_idx=np.nonzero(time_lag)
        lags.append(lag_idx)
        if time_lag.sum()>0:
            # Drop data beyond the first discontinuity 
            walk_idx=np.array(walk_data.index)
            sliced_walk=walk_idx[lag_idx[0][0]:] #sliced from the first discontinuity to the end 
            df=df.drop(sliced_walk) #drop the sliced data from the df
    return df.reset_index(drop=True)

         

In [5]:
raw_data=eliminate_discontinuity(raw_data)
raw_data.to_csv("../data/processed/dataset_raw.csv", index=False)
raw_data

Unnamed: 0,stamp,SubjectId,RunId,x,y,z,speed,walk,rms
0,1.527700e+09,52.0,1.0,-426.0,-102.0,124.0,0.133031,52-1,262.840890
1,1.527700e+09,52.0,1.0,-413.0,-81.0,124.0,0.133031,52-1,253.315350
2,1.527700e+09,52.0,1.0,-394.0,-69.0,126.0,0.133031,52-1,242.124624
3,1.527700e+09,52.0,1.0,-367.0,-52.0,122.0,0.133031,52-1,225.297581
4,1.527700e+09,52.0,1.0,-340.0,-36.0,118.0,0.133031,52-1,208.822093
...,...,...,...,...,...,...,...,...,...
2041815,1.528365e+09,988.0,2.0,336.0,4.0,0.0,0.056256,988-2,194.003436
2041816,1.528365e+09,988.0,2.0,332.0,6.0,2.0,0.056256,988-2,191.715066
2041817,1.528365e+09,988.0,2.0,328.0,3.0,6.0,0.056256,988-2,189.410489
2041818,1.528365e+09,988.0,2.0,330.0,6.0,0.0,0.056256,988-2,190.557078


In [6]:

cleaned_data=eliminate_discontinuity(cleaned_data)
cleaned_data.to_csv("../data/processed/dataset_interp.csv", index=False)
cleaned_data

Unnamed: 0,stamp,SubjectId,RunId,walk,speed,x,y,z,rms
0,1.527700e+09,52.0,1.0,52-1,0.1,-426.0,-102.0,124.0,262.840890
1,1.527700e+09,52.0,1.0,52-1,0.1,-413.0,-81.0,124.0,253.315350
2,1.527700e+09,52.0,1.0,52-1,0.1,-394.0,-69.0,126.0,242.124624
3,1.527700e+09,52.0,1.0,52-1,0.1,-367.0,-52.0,122.0,225.297581
4,1.527700e+09,52.0,1.0,52-1,0.1,-340.0,-36.0,118.0,208.822093
...,...,...,...,...,...,...,...,...,...
2039963,1.528365e+09,988.0,2.0,988-2,0.1,336.0,4.0,0.0,194.003436
2039964,1.528365e+09,988.0,2.0,988-2,0.1,332.0,6.0,2.0,191.715066
2039965,1.528365e+09,988.0,2.0,988-2,0.1,328.0,3.0,6.0,189.410489
2039966,1.528365e+09,988.0,2.0,988-2,0.1,330.0,6.0,0.0,190.557078
