### General Imports

In [11]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import os
import glob
from cleanUp import cleanUp
from fillDf import fillDf
from fixYearStamp import fixYearStamp


### Data Cleaning
Passing the sensor data through the cleanUp function to get fix timestamps and delete null timestamps.

In [18]:
all_csv_files = glob.glob("./Data/*.txt")
# insert the desired start time
cutOffTime = '3/22/2021 9:30'
# insert the time rectifying offsets. default of for nothing {'':0}
sensorConditions = {'S-01':7,'S-02':7,'S-03':7,'S-04':7,'S-05':7,'S-06':7,'S-15':7,'S-19':7}
#This indicates which columns to keep. Here we're taking all of the dP info and the timestamps
columns = [0,1,6,7,8,9,10,11]

Changed this to markdown so it won't run twice, had to fix the timestamps on S-12
filePath        = all_csv_files[11]
incorrectString = '21/3/22'
date            = '3/22/2021'
charTimeStart   = 11
charTimeEnd     = 21
offset          = 0
fixYearStamp(filePath,incorrectString,date,charTimeStart,charTimeEnd,offset)

In [21]:
data = cleanUp(cutOffTime,sensorConditions,all_csv_files,columns)

S-01     2021-03-22 09:30:00      2021-03-22 13:00:19
S-02     2021-03-22 09:30:01      2021-03-22 12:59:21
S-03     2021-03-22 09:30:00      2021-03-22 12:59:09
S-04     2021-03-22 09:30:00      2021-03-22 12:59:00
S-05     2021-03-22 09:30:00      2021-03-22 12:59:29
S-06     2021-03-22 09:30:00      2021-03-22 13:00:49
S-07     2021-03-22 09:30:05      2021-03-22 12:58:50
S-08     2021-03-22 09:30:05      2021-03-22 12:58:46
S-09     2021-03-22 09:30:16      2021-03-22 12:59:26
S-10     2021-03-22 09:30:06      2021-03-22 12:59:32
S-11     2021-03-22 09:30:03      2021-03-22 12:59:53
S-12     2021-03-22 09:30:08      2021-03-22 12:59:30
S-13     2021-03-22 09:30:05      2021-03-22 12:59:15
S-15     2021-03-22 09:30:00      2021-03-22 12:59:03
S-16     2021-03-22 09:30:00      2021-03-22 13:32:35
S-17     2021-03-22 09:30:00      2021-03-22 13:26:45
S-18     2021-03-22 11:23:18      2021-03-22 13:01:22
S-19     2021-03-22 09:30:07      2021-03-22 13:01:21


### Exporting Data
Here we can export the organized data frames as csv files

In [23]:
directory = './proccessedData'
for x in data:
    temp=data[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Checking Data
Here we scan through the data for irregularities in data recording.

In [52]:
fout = open('./dataInfo/time_Frequency_Error_Log.txt','wt')
errors = {}
errorCount = {}
# Enter the expected interval here
interval = 10
for x in data:
    # errors keeps track of length of each time interval error that occurs
    errors[x] = set(())
    # errorCount keeps track of how many times each time interval error occured
    errorCount[x] = {}
    # counter keeps track of the total time interval errors per sensor
    counter = 0
    #shows the total
    temp = data[x]
    for idx,i in enumerate(temp['Date_Time']):
        try:
            if not ((temp['Date_Time'][idx+1] - i) == pd.Timedelta(seconds=interval)):
                timeErr = temp['Date_Time'][idx+1] - i
                if str(timeErr.seconds) in errorCount[x]:
                    errorCount[x][str(timeErr.seconds)] +=1
                else:
                    errorCount[x][str(timeErr.seconds)] = 1

                errors[x].add(timeErr)


                counter += 1
        except:
            continue

    print(str(round(counter/len(temp)*100,2)),'% potential error in ', x)
    fout.write('potential error in '+ x +'\n' + str(round(counter/len(temp)*100,2))+'%'+'\n')

    # display the different types of errors
    lst = [i.seconds for i in errors[x]]
    frmt = "{:>4}"*len(lst)
    print(frmt.format(*lst))
    fout.write("Time Errors" + frmt.format(*lst)+ '\n')

    # display the quantity of each type of error
    lst = [errorCount[x][str(i.seconds)] for i in errors[x]]
    frmt = "{:>4}"*len(lst)
    print(frmt.format(*lst))
    fout.write("# Observed " + frmt.format(*lst)+ '\n')

    print()
    fout.write('\n')


fout.close()

0.08 % potential error in  S-01
  19
   1

0.0 % potential error in  S-02



0.16 % potential error in  S-03
  19  20
   1   1

0.24 % potential error in  S-04
  21   9  20
   1   1   1

1.02 % potential error in  S-05
  19 850  11   9
   1   1   5   5

0.08 % potential error in  S-06
   9
   1

20.02 % potential error in  S-07
  26  19  20
   1   1 207

20.31 % potential error in  S-08
  21  11  20  19
   1   3 205   3

49.88 % potential error in  S-09
  20
 418

26.97 % potential error in  S-10
  26 130   9  19   3  20   4  27  11
  22   1   3   2  43 145  16  38  10

20.0 % potential error in  S-11
  20
 210

99.68 % potential error in  S-12
  211319  19   0  20  11
   6   1   4  61 551   1

19.96 % potential error in  S-13
  20
 209

0.64 % potential error in  S-15
  38  14  12  24  51   0  13  11
   1   1   1   1   1   1   1   1

0.28 % potential error in  S-16
  14  66   1
   2   1   1

0.21 % potential error in  S-17
  14   9  12
   1   1   1

0.17 % potential error in  S-18
  1

Notice there are quite a few repeating errors here in our data set. We can either choose to interpolate the data inbetween or pad it with 0s. For gaps <40s i will interpolate, but for gaps >40 i will 0 pad.

In [66]:
fout = open('./dataInfo/interpolation_Effect_Log.txt','wt')
interpDF = {}

for x in data:
    df = data[x]
    cutoff = 40
    freq = '10S'
    try:
        interpDF[x],accuracy = fillDf(df,freq,'3/22/2021 9:30','3/22/2021 13:00',cutoff)
        print(x,' ',accuracy)
        fout.write(x+' '+ '\n' + accuracy[0]+ '\n'+ accuracy[1]+ '\n'+ accuracy[2] +'\n\n')
    except IndexError:
        print(x,'NO DATA')
        fout.write(x+'NO DATA'+'\n')
fout.close()        

S-01   ['% of values from interpolation : 0.0', '% of values from 0-padding : 0.0', '% of values not changed : 100.0']
S-02   ['% of values from interpolation : 0.0', '% of values from 0-padding : 0.0', '% of values not changed : 100.0']
S-03   ['% of values from interpolation : 0.159', '% of values from 0-padding : 0.0', '% of values not changed : 99.841']
S-04   ['% of values from interpolation : 0.398', '% of values from 0-padding : 0.0', '% of values not changed : 99.602']
S-05   ['% of values from interpolation : 0.477', '% of values from 0-padding : 6.762', '% of values not changed : 92.761']
S-06   ['% of values from interpolation : 0.0', '% of values from 0-padding : 0.0', '% of values not changed : 100.0']
S-07   ['% of values from interpolation : 33.413', '% of values from 0-padding : 0.0', '% of values not changed : 66.587']
S-08   ['% of values from interpolation : 33.36', '% of values from 0-padding : 0.0', '% of values not changed : 66.64']
S-09   ['% of values from inter

### Export Data
export the newly interpolated data

In [67]:
directory = './interpolatedData'
for x in interpDF:
    temp=interpDF[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

./interpolatedData\S-19.csv


### Merge the DataFrames
Also remove 'S-02' from the dictionary as it has no real data
and find the least common index

In [70]:
# interpDF.pop('S-02',None)
# interpDF.pop('S-BU2',None)
# interpDF.pop('S-BU1',None)
length = []
for x in interpDF:
    length.append(len(interpDF[x]))
index = min(length)
print(index)

1253


In [107]:
tempList = temp[15:19]
tempList

[0, 0, 0, 0]

In [83]:
for count,key in enumerate(list(interpDF.keys())):
    print(count+1,key,temp[count+1])

1 S-01 0
2 S-02 0
3 S-03 9
4 S-04 0
5 S-05 0
6 S-06 0
7 S-07 0
8 S-08 4
9 S-09 0
10 S-10 0
11 S-11 0
12 S-12 0
13 S-13 0
14 S-15 0
15 S-16 0
16 S-17 0
17 S-18 0
18 S-19 0


In [261]:
dfMerged = []
columns = list(interpDF.keys())
columns.extend(['Average',
'Variance',
'Zone 1',
'Var Z1',
'Zone 2',
'Var Z2',
'Zone 3',
'Var Z3'])
# 'Zone 4',
# 'Var Z4'])

for idx,i in enumerate(interpDF[columns[0]].values[:index]):
    temp = []
    temp.append(i[0])
    for x in interpDF:
        temp.append(interpDF[x].values[idx][1])
    #So we now have a list with the timestamp and then sensors
    
    #here we add the overall average and variance columns
    temp.append(np.average(temp[1:16]))
    temp.append(np.std(temp[1:16]))

    #here we're segregating the zones in the file giving their variance and avg

    #Zone 1 the 2 sensors right on top of the nebulizer
    lst = temp[1:7]
    temp.append(np.average(lst))
    temp.append(np.std(lst))
    # #Zone 2 the perimiter of the bed
    # lst = [temp[2],temp[3],temp[5],temp[6]]
    # temp.append(np.average(lst))
    # temp.append(np.std(lst))
    #Zone 3 the perimeter of the room
    lst = temp[7:16]
    temp.append(np.average(lst))
    temp.append(np.std(lst))
    #Zone 4 is just the outside sensor
    lst = temp[16:19]
    temp.append(np.average(lst))
    temp.append(np.std(lst))
    dfMerged.append(temp)
columns.insert(0,'Date_Time')

In [262]:
mergedData = pd.DataFrame(dfMerged,columns = columns)

### Increase Resolution on mergedData

In [263]:

for i in mergedData:
    tempFrame = mergedData.values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    hiResMergedDF = pd.DataFrame(tempList, columns = mergedData.keys())

### Export Merged Frames

In [264]:
directory = './mergedData/'
if not os.path.exists(directory):

    os.makedirs(directory)

location = os.path.join(directory+'mergedFrame.csv')
hiResMergedDF.to_csv(location,index=False)

### Create csv files for each animation
We have 3 expirements in each that we want to average across the range

In [265]:
expTRange = {

    'EE502 Door Closed':
    [pd.Timestamp('3/22/2021 9:40'),
    pd.Timestamp('3/22/2021 10:05:23'),
    pd.Timestamp('3/22/2021 10:23:52')],
    'EE502 Door Open':
    [pd.Timestamp('3/22/2021 10:42:43'),
    pd.Timestamp('3/22/2021 10:59:23'),
    pd.Timestamp('3/22/2021 11:15:37')],
    'EE502 Negative Pressure':
    [pd.Timestamp('3/22/2021 11:32:21'),
    pd.Timestamp('3/22/2021 11:42:27'),
    pd.Timestamp('3/22/2021 11:53:47')],
    'EE504 Door Open':
    [pd.Timestamp('3/22/2021 12:19:12'),
    pd.Timestamp('3/22/2021 12:30:10'),
    pd.Timestamp('3/22/2021 12:40:15')],
}

#enter in the expirement length as seconds/10
expTLen = {
    'EE502 Door Closed' : 15*6,
    'EE502 Door Open':15*6,
    'EE502 Negative Pressure':10*6,
    'EE504 Door Open':10*6,
}

In [266]:
# mergedData = pd.read_csv('./mergedData/mergedFrame.csv',parse_dates=[0])

In [267]:
time = mergedData['Date_Time']
expIndexes = {}
for i in expTRange:
    expIndexes[i] = []
    for x in expTRange[i]:
        for start,n in enumerate(time):
           if n >= x:
               expIndexes[i].append(start)
               break

In [268]:
expTLen[label]

60

In [269]:
# controls how many seconds of data before each experiment to include
preCursorFactor = 6
averagedFrame = {}
expirementFrame = {}

for label in expIndexes:

    df1Index1 = expIndexes[label][0] - preCursorFactor
    df1Index2 = expIndexes[label][0] + expTLen[label]
    df1 = mergedData.iloc[df1Index1 : df1Index2 , 1: ].reset_index(drop = True)

    df2Index1 = expIndexes[label][1] - preCursorFactor
    df2Index2 = expIndexes[label][1] + expTLen[label]
    df2 = mergedData.iloc[df2Index1 : df2Index2 , 1: ].reset_index(drop = True)

    df3Index1 = expIndexes[label][2] - preCursorFactor
    df3Index2 = expIndexes[label][2] + expTLen[label]
    df3 = mergedData.iloc[df3Index1 : df3Index2 , 1: ].reset_index(drop = True)

    averagedFrame[label] = (df1 + df2 + df3)/3

    expirementFrame[label+' Exp1'] = df1
    expirementFrame[label+' Exp2'] = df2
    expirementFrame[label+' Exp3'] = df3
    
#assuming there were 3 expirements for each one

In [270]:
directory = './averagedData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in averagedFrame:
    temp=averagedFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)



In [271]:
directory = './expirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in expirementFrame:
    temp=expirementFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Increase the Resolution
pad out the dataframes to have values for every second.

In [272]:
stretchedDF = {}
for i in averagedFrame:
    tempFrame = averagedFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchedDF[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)     

In [273]:
stretchExpDf = {}
for i in expirementFrame:
    tempFrame = expirementFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchExpDf[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)

In [274]:
directory = './stretchedAvgData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchedDF:
    temp=stretchedDF[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

In [275]:
directory = './stretchedExpirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchExpDf:
    temp=stretchExpDf[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)