In [60]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import os

In [61]:
outliers = pd.read_csv("outliers.csv")
outliers = outliers.drop(["Unnamed: 0", "Unnamed: 0.1"], 1)
print(sum(outliers["outliers"]))
outliers.head()

147


Unnamed: 0,subject,task,sensor,start,stop,length,outliers
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False


In [62]:
#less than 10 seconds
extended = []
for i, r in outliers.iterrows():
    if r["stop"] - r["start"] < 10:
        extended.append(r["start"] + 10)
    else:
        extended.append(-1)
outliers["extended"] = extended
print(np.sum(outliers["extended"].count()))
outliers.head()


944


Unnamed: 0,subject,task,sensor,start,stop,length,outliers,extended
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True,-1.0
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True,-1.0
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False,-1.0
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False,76.575
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False,25.05


In [63]:
#Set up for visual continuous check
continuous = []
for i, r in outliers.iterrows():
    if r["outliers"]:
        continuous.append('')
    else:
        continuous.append(-1)
outliers["continuous"] = continuous
print(np.sum(outliers["continuous"].count()))
outliers.head()

#-1: already valid, between 10 and 20s
#2: longer than 20 but continuous, use stop column
#3: longer than 20, not continuous, but clear outlying spike need to keep going
#4: longer than 20, not continuous, unclear period

944


Unnamed: 0,subject,task,sensor,start,stop,length,outliers,extended,continuous
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True,-1.0,
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True,-1.0,
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False,-1.0,-1.0
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False,76.575,-1.0
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False,25.05,-1.0


In [64]:
oc = pd.read_csv("outliers_continuous.csv")
oc = oc.drop(["Unnamed: 0", "Unnamed: 10"], axis=1)
oc.head()

Unnamed: 0,subject,task,sensor,start,stop,length,outliers,extended,continuous
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True,-1.0,3
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True,-1.0,3
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False,-1.0,-1
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False,76.575,-1
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False,25.05,-1


In [65]:
oc.head()

Unnamed: 0,subject,task,sensor,start,stop,length,outliers,extended,continuous
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True,-1.0,3
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True,-1.0,3
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False,-1.0,-1
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False,76.575,-1
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False,25.05,-1


In [66]:
#set up fixed outliers for 2s
fixed = []
final_start = []
final_stop = []

for i, r in oc.iterrows():
    if float(r["continuous"]) == 2:
        fixed.append("continuous")
        final_start.append(r["start"])
        final_stop.append(r["stop"])
    elif float(r["extended"]) != -1:
        fixed.append("extended")
        final_start.append(r["start"])
        final_stop.append(r["extended"])
    elif float(r["continuous"]) == -1:
        fixed.append("regular")
        final_start.append(r["start"])
        final_stop.append(r["stop"])
    else:
        fixed.append(False)
        final_start.append(-1)
        final_stop.append(-1)

print(len(fixed), len(final_start), len(final_stop))

oc["fixed"] = fixed
oc["final_start"] = final_start
oc["final_stop"] = final_stop



944 944 944


In [67]:
#Check for pairs
for i, r in oc.iterrows():
    if not r["fixed"]:
        if r["continuous"] == 3 or r["continuous"] == 4:
            if r["sensor"] == "right_1" or r["sensor"] == "left_1":
                r2 = oc.loc[i+1]
                if r2["fixed"]:
                    oc.loc[i, "fixed"] = "pair"
                    oc.loc[i, "final_start"] = r2["final_start"]
                    oc.loc[i, "final_stop"] = r2["final_stop"]
            else:
                r2 = oc.loc[i-1]
                if r2["fixed"]:
                    oc.loc[i, "fixed"] = "pair"
                    oc.loc[i, "final_start"] = r2["final_start"]
                    oc.loc[i, "final_stop"] = r2["final_stop"]
oc.head()                    

Unnamed: 0,subject,task,sensor,start,stop,length,outliers,extended,continuous,fixed,final_start,final_stop
0,PDMotion_0020_Test,PS,right_1,11.4125,55.575,44.1625,True,-1.0,3,False,-1.0,-1.0
1,PDMotion_0020_Test,PS,right_2,11.375,55.625,44.25,True,-1.0,3,False,-1.0,-1.0
2,PDMotion_0020_Test,PS,left_1,65.9625,76.0125,10.05,False,-1.0,-1,regular,65.9625,76.0125
3,PDMotion_0020_Test,PS,left_2,66.575,75.9625,9.3875,False,76.575,-1,extended,66.575,76.575
4,PDMotion_0020_Test,HM,right_1,15.05,19.8875,4.8375,False,25.05,-1,extended,15.05,25.05


In [68]:
td = 0
for i, r in oc.iterrows():
    if r["fixed"] == False:
        td+=1
print(td)

#146->133->70
#

70


In [76]:
oc.to_csv("outliers_fixed_3.csv")

In [71]:
#fix other cases with largest continous chunk
chunk_start = []
chunk_end = []

for i, r in oc.iterrows():
    if not r["fixed"]:
        s_data = pd.read_csv("series/"+r["subject"]+"_"+r["task"]+"_"+r["sensor"]+".csv")
        s_data = s_data[["relative_time", "threshold"]].dropna()
        fixed_start, fixed_stop, pairs_list = largest_chunk(s_data)
        
        oc.loc[i, "fixed"] = "chunk"
        oc.loc[i, "final_start"] = fixed_start
        oc.loc[i, "final_stop"] = fixed_stop



399.0
11.4125 11.4125 2
if
11.4125 11.4125
11.425 0.012500000000001066 2
11.4375 0.01249999999999929 2
11.45 0.01249999999999929 2
11.4625 0.012500000000001066 2
11.475 0.01249999999999929 2
11.4875 0.012500000000001066 2
11.575 0.08749999999999858 2
11.5875 0.012500000000001066 2
11.6 0.01249999999999929 2
11.6125 0.012500000000001066 2
11.625 0.01249999999999929 2
11.6375 0.01249999999999929 2
11.65 0.012500000000001066 2
11.6625 0.01249999999999929 2
11.85 0.1875 2
11.8625 0.012500000000001066 2
11.8875 0.02499999999999858 2
11.9 0.012500000000001066 2
11.9125 0.01249999999999929 2
11.925 0.012500000000001066 2
11.9375 0.01249999999999929 2
11.95 0.01249999999999929 2
11.9625 0.012500000000001066 2
11.975 0.01249999999999929 2
11.9875 0.012500000000001066 2
12.0 0.01249999999999929 2
12.0125 0.01249999999999929 2
12.025 0.012500000000001066 2
12.0375 0.01249999999999929 2
12.05 0.012500000000001066 2
12.0625 0.01249999999999929 2
12.075 0.01249999999999929 2
12.0875 0.01250000000000

In [91]:
s_data
s_data.query('threshold == 1' ).head(1).relative_time.values[0]

17.9375

In [69]:
def largest_chunk(s_data, max_time = 2):
    print(sum(s_data["threshold"]))
    pairs = []
    cstart = 0 #s_data.query('threshold == 1').head(1).relative_time.values[0]
    cend = 0
    for i, r in s_data.iterrows():
        if r["threshold"] == 1:
            print(r["relative_time"],r["relative_time"] - cend, max_time)
            if r["relative_time"] - cend > max_time:
                print("if")
                pairs.append((cstart, cend))
                cstart = r["relative_time"]
                cend = r["relative_time"]
                print(cstart, cend)
            else:
                cend = r["relative_time"]
    
    pairs.append((cstart, cend)) #if the series ends with a pair
    
    max_pair = pairs[0]
    max_time = pairs[0][1]-pairs[0][0]
    for p in pairs:
        if p[1]-p[0] > max_time:
            max_pair = p
            max_time = p[1] - p[0]
    return max_pair[0], max_pair[1], pairs


In [58]:
#oc.head()

In [59]:
s_data = pd.read_csv("series/"+"PDMotion_0025_Test"+"_"+"TT"+"_"+"left_2"+".csv")
s_data = s_data[["relative_time", "threshold"]].dropna()
#print(s_data)
stime, etime, pairlist = largest_chunk(s_data, max_time=2)
print(stime, etime, pairlist)


349.0
46.5375 46.5375 2
if
46.5375 46.5375
46.55 0.012499999999995737 2
46.5625 0.012500000000002842 2
46.575 0.012500000000002842 2
46.5875 0.012499999999995737 2
46.6 0.012500000000002842 2
46.6125 0.012499999999995737 2
46.625 0.012500000000002842 2
46.6375 0.012500000000002842 2
46.975 0.3374999999999986 2
46.9875 0.012499999999995737 2
47.0 0.012500000000002842 2
47.0125 0.012500000000002842 2
47.025 0.012499999999995737 2
47.0375 0.012500000000002842 2
47.05 0.012499999999995737 2
47.0625 0.012500000000002842 2
47.075 0.012500000000002842 2
47.0875 0.012499999999995737 2
47.3625 0.2749999999999986 2
47.375 0.012500000000002842 2
47.3875 0.012500000000002842 2
47.4 0.012499999999995737 2
47.4125 0.012500000000002842 2
47.425 0.012499999999995737 2
47.4375 0.012500000000002842 2
47.45 0.012500000000002842 2
47.4625 0.012499999999995737 2
47.475 0.012500000000002842 2
47.4875 0.012499999999995737 2
47.7375 0.25 2
47.75 0.012500000000002842 2
47.7625 0.012500000000002842 2
47.775 0.0

349.0

In [73]:
#check for pairs again
for i, r in oc.iterrows():
    if r["final_stop"] - r["final_start"] < 5:
        if r["sensor"] == "right_1" or r["sensor"] == "left_1":
            r2 = oc.loc[i+1]
            if r2["fixed"]:
                oc.loc[i, "fixed"] = "chunk_pair"
                oc.loc[i, "final_start"] = r2["final_start"]
                oc.loc[i, "final_stop"] = r2["final_stop"]
        else:
            r2 = oc.loc[i-1]
            if r2["fixed"]:
                oc.loc[i, "fixed"] = "chunk_pair"
                oc.loc[i, "final_start"] = r2["final_start"]
                oc.loc[i, "final_stop"] = r2["final_stop"]

In [75]:
#final extended lengths
extended_2 = 0
for i, r in oc.iterrows():
    if r["final_stop"] - r["final_start"] < 10:
        oc.loc[i, "final_stop"] = r["final_start"] + 10
        if r["fixed"] == "chunk":
            oc.loc[i, "fixed"] = "chunk_extend"
        if r["fixed"] == "chunk_pair":
            oc.loc[i, "fixed"] = "chunk_pair_extended"
        extended_2+=1
print(extended_2)

83
