### General Imports

In [7]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import os
import glob
from cleanUp import cleanUp
from fillDf import fillDf
from fixYearStamp import fixYearStamp


### Data Cleaning
Passing the sensor data through the cleanUp function to get fix timestamps and delete null timestamps.

In [9]:
all_csv_files = sorted(glob.glob("./Data/*.txt"))
# insert the desired start time
cutOffTime = '4/19/2021 17:00:00'
endTime = '4/19/2021 20:00:00'
# insert the time rectifying offsets. default of for nothing {'':0}
sensorConditions = {'S-01':7,'S-02':7,'S-03':7,'S-04':7,'S-05':7,'S-06':7,'S-15':7,'S-19':7}
#This indicates which columns to keep. Here we're taking all of the dP info and the timestamps
columns = [0,1,6,7,8,9,10,11]

In [11]:
all_csv_files

['./Data/S-01.txt',
 './Data/S-02.txt',
 './Data/S-03.txt',
 './Data/S-04.txt',
 './Data/S-05.txt',
 './Data/S-06.txt',
 './Data/S-07.txt',
 './Data/S-08.txt',
 './Data/S-09.txt',
 './Data/S-10.txt',
 './Data/S-11.txt',
 './Data/S-12.txt',
 './Data/S-13.txt',
 './Data/S-14.txt',
 './Data/S-15.txt',
 './Data/S-16.txt',
 './Data/S-18.txt',
 './Data/S-19.txt']

Changed this to markdown so it won't run twice, had to fix the timestamps on S-12
filePath        = all_csv_files[11]
incorrectString = '21/3/22'
date            = '3/22/2021'
charTimeStart   = 11
charTimeEnd     = 21
offset          = 0
fixYearStamp(filePath,incorrectString,date,charTimeStart,charTimeEnd,offset)

In [13]:
data = cleanUp(cutOffTime,sensorConditions,all_csv_files,columns)

S-01     2021-04-19 17:22:10      2021-04-19 19:58:59
S-02     2021-04-19 17:03:05      2021-04-19 19:59:29
S-03     2021-04-19 17:19:10      2021-04-19 19:59:09
S-04     2021-04-19 17:39:10      2021-04-19 19:58:59
S-05     2021-04-19 17:06:10      2021-04-19 19:58:29
S-06     2021-04-19 17:11:10      2021-04-19 19:59:09
S-07     2021-04-19 17:07:34      2021-04-19 19:59:18
S-08     2021-04-19 17:05:08      2021-04-19 19:59:11
S-09     2021-04-19 17:10:11      2021-04-19 19:58:58
S-10     2021-04-19 17:09:59      2021-04-19 19:58:36
S-11     2021-04-19 17:19:22      2021-04-19 19:59:09
S-12     2021-04-19 17:11:50      2021-04-19 19:58:57
S-13     2021-04-19 17:21:07      2021-04-19 19:59:14
S-14     2021-04-19 17:05:18      2021-04-19 19:59:15
S-15     2021-04-19 17:12:10      2021-04-19 20:01:55
S-16     2021-04-19 17:07:36      2021-04-19 19:58:50
S-18     2021-04-19 17:11:07      2021-04-19 20:02:02
S-19     2021-04-19 17:10:16      2021-04-19 20:02:01


### Exporting Data
Here we can export the organized data frames as csv files

In [16]:
directory = './proccessedData'
for x in data:
    temp=data[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Checking Data
Here we scan through the data for irregularities in data recording.

In [18]:
directory = './dataInfo'
if not os.path.exists(directory):
    os.makedirs(directory)
fout = open('./dataInfo/time_Frequency_Error_Log.txt','wt')
errors = {}
errorCount = {}
# Enter the expected interval here
interval = 10
for x in data:
    # errors keeps track of length of each time interval error that occurs
    errors[x] = set(())
    # errorCount keeps track of how many times each time interval error occured
    errorCount[x] = {}
    # counter keeps track of the total time interval errors per sensor
    counter = 0
    #shows the total
    temp = data[x]
    for idx,i in enumerate(temp['Date_Time']):
        try:
            if not ((temp['Date_Time'][idx+1] - i) == pd.Timedelta(seconds=interval)):
                timeErr = temp['Date_Time'][idx+1] - i
                if str(timeErr.seconds) in errorCount[x]:
                    errorCount[x][str(timeErr.seconds)] +=1
                else:
                    errorCount[x][str(timeErr.seconds)] = 1

                errors[x].add(timeErr)


                counter += 1
        except:
            continue

    print(str(round(counter/len(temp)*100,2)),'% potential error in ', x)
    fout.write('potential error in '+ x +'\n' + str(round(counter/len(temp)*100,2))+'%'+'\n')

    # display the different types of errors
    lst = [i.seconds for i in errors[x]]
    frmt = "{:>4}"*len(lst)
    print(frmt.format(*lst))
    fout.write("Time Errors" + frmt.format(*lst)+ '\n')

    # display the quantity of each type of error
    lst = [errorCount[x][str(i.seconds)] for i in errors[x]]
    frmt = "{:>4}"*len(lst)
    print(frmt.format(*lst))
    fout.write("# Observed " + frmt.format(*lst)+ '\n')

    print()
    fout.write('\n')


fout.close()

0.21 % potential error in  S-01
   9  20
   1   1

0.09 % potential error in  S-02
  14
   1

0.21 % potential error in  S-03
   9  20
   1   1

0.6 % potential error in  S-04
  11   9
   2   3

0.68 % potential error in  S-05
   9  16   4  30  40  20
   1   1   1   1   1   2

0.2 % potential error in  S-06
   9  20
   1   1

19.9 % potential error in  S-07
 627  27  20
   1   1 159

49.86 % potential error in  S-08
  27  59  20
   2   1 343

50.0 % potential error in  S-09
  21  22  11  18  19  26  20
   2   1   1   1   2   1 330

49.85 % potential error in  S-10
  17  20
   1 336

19.88 % potential error in  S-11
  27  20
   1 158

99.8 % potential error in  S-12
  27  20
   1 500

20.08 % potential error in  S-13
  27  23  19  58  20
   1   1   1   1 154

49.92 % potential error in  S-14
 939  27  21  20
   1   1   1 314

0.49 % potential error in  S-15
   9  17  15  11  13
   1   1   1   1   1

0.1 % potential error in  S-16
  14
   1

0.39 % potential error in  S-18
  17   9  14  

Notice there are quite a few repeating errors here in our data set. We can either choose to interpolate the data inbetween or pad it with 0s. For gaps <40s i will interpolate, but for gaps >40 i will 0 pad.

In [19]:
fout = open('./dataInfo/interpolation_Effect_Log.txt','wt')
interpDF = {}

for x in data:
    df = data[x]
    cutoff = 40
    freq = '10S'
    try:
        interpDF[x],accuracy = fillDf(df,freq,cutOffTime,endTime,cutoff)
        print(x,' ',accuracy)
        fout.write(x+' '+ '\n' + accuracy[0]+ '\n'+ accuracy[1]+ '\n'+ accuracy[2] +'\n\n')
    except IndexError:
        print(x,'NO DATA')
        fout.write(x+'NO DATA'+'\n')
fout.close()        

S-01   ['% of values from interpolation : 0.186', '% of values from 0-padding : 12.477', '% of values not changed : 87.337']
S-02   ['% of values from interpolation : 0.0', '% of values from 0-padding : 1.764', '% of values not changed : 98.236']
S-03   ['% of values from interpolation : 0.186', '% of values from 0-padding : 10.791', '% of values not changed : 89.023']
S-04   ['% of values from interpolation : 0.372', '% of values from 0-padding : 21.974', '% of values not changed : 77.654']
S-05   ['% of values from interpolation : 1.027', '% of values from 0-padding : 3.548', '% of values not changed : 95.425']
S-06   ['% of values from interpolation : 0.186', '% of values from 0-padding : 6.326', '% of values not changed : 93.488']
S-07   ['% of values from interpolation : 29.74', '% of values from 0-padding : 10.13', '% of values not changed : 60.13']
S-08   ['% of values from interpolation : 64.312', '% of values from 0-padding : 3.439', '% of values not changed : 32.249']
S-09   

### Export Data
export the newly interpolated data

In [21]:
directory = './interpolatedData'
for x in interpDF:
    temp=interpDF[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Merge the DataFrames
Also remove 'S-02' from the dictionary as it has no real data
and find the least common index

In [25]:
# interpDF.pop('S-02',None)
# interpDF.pop('S-BU2',None)
# interpDF.pop('S-BU1',None)
length = []
for x in interpDF:
    length.append(len(interpDF[x]))
index = min(length)
print(index)

1071


In [14]:
tempList = temp[15:19]
tempList

Unnamed: 0,Date_Time,Dp>0.3,Dp>0.5,Dp>1.0,Dp>2.5,Dp>5.0,Dp>10.0
15,2021-04-19 17:12:46,111,37,9,3,0,0
16,2021-04-19 17:12:56,39,13,0,0,0,0
17,2021-04-19 17:13:06,81,27,0,0,0,0
18,2021-04-19 17:13:16,177,59,0,0,0,0


In [12]:
for count,key in enumerate(list(interpDF.keys())):
    print(count+1,key,temp[count+1])

KeyError: 1

In [20]:
dfMerged = []
columns = list(interpDF.keys())
columns.extend(['Average',
'Variance'])
# 'Zone 1',
# 'Var Z1',
# 'Zone 2',
# 'Var Z2',
# 'Zone 3',
# 'Var Z3'])
# 'Zone 4',
# 'Var Z4'])

for idx,i in enumerate(interpDF[columns[0]].values[:index]):
    temp = []
    temp.append(i[0])
    for x in interpDF:
        temp.append(interpDF[x].values[idx][1])
    #So we now have a list with the timestamp and then sensors
    
    #here we add the overall average and variance columns
    temp.append(np.average(temp[1:16]))
    temp.append(np.std(temp[1:16]))

    #here we're segregating the zones in the file giving their variance and avg

    # #Zone 1 the 2 sensors right on top of the nebulizer
    # lst = temp[1:7]
    # temp.append(np.average(lst))
    # temp.append(np.std(lst))
    # # #Zone 2 the perimiter of the bed
    # # lst = [temp[2],temp[3],temp[5],temp[6]]
    # # temp.append(np.average(lst))
    # # temp.append(np.std(lst))
    # #Zone 3 the perimeter of the room
    # lst = temp[7:16]
    # temp.append(np.average(lst))
    # temp.append(np.std(lst))
    # #Zone 4 is just the outside sensor
    # lst = temp[16:19]
    # temp.append(np.average(lst))
    # temp.append(np.std(lst))
    dfMerged.append(temp)
columns.insert(0,'Date_Time')

In [22]:
mergedData = pd.DataFrame(dfMerged,columns = columns)

### Increase Resolution on mergedData

In [23]:

for i in mergedData:
    tempFrame = mergedData.values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    hiResMergedDF = pd.DataFrame(tempList, columns = mergedData.keys())

### Export Merged Frames

In [24]:
directory = './mergedData/'
if not os.path.exists(directory):

    os.makedirs(directory)

location = os.path.join(directory+'mergedFrame.csv')
hiResMergedDF.to_csv(location,index=False)

### Create csv files for each animation
We have 3 expirements in each that we want to average across the range

In [265]:
day = '4/19/2021'
expTRange = {

    'OR 5 Unblocked':
    [
    pd.Timestamp(day + ' 5:23:24 PM'),
    pd.Timestamp(day + ' 5:32:20 PM'),
    pd.Timestamp(day + ' 5:42:00 PM'),
    pd.Timestamp(day + ' 5:52:00 PM'),
    pd.Timestamp(day + ' 5:58:00 PM'),
    pd.Timestamp(day + ' 6:25:20 PM')],
    ],
    'OR 5 Blocked':
    [
    pd.Timestamp(day + ' 6:08:50 PM'),
    pd.Timestamp(day + ' 6:16:50 PM'),
    pd.Timestamp(day + ' 6:25:20 PM')],
    'OR 12 Unblocked':
    [
    pd.Timestamp(day + ' 6:52:50 PM'),
    pd.Timestamp(day + ' 7:03:30 PM'),
    pd.Timestamp(day + ' 7:13:30 PM')],
    'OR 12 Blocked':
    [
    pd.Timestamp(day + ' 7:25:24 PM'),
    pd.Timestamp(day + ' 7:34:45 PM'),
    pd.Timestamp(day + ' 7:38:24 PM')],
}

#enter in the expirement length as seconds/10
expTLen = {
    'OR 5 Unblocked' : 15*6,
    'OR 5 Blocked':15*6,
    'OR 12 Unblocked':10*6,
    'OR 12 Blocked':10*6,
}

In [266]:
# mergedData = pd.read_csv('./mergedData/mergedFrame.csv',parse_dates=[0])

In [267]:
time = mergedData['Date_Time']
expIndexes = {}
for i in expTRange:
    expIndexes[i] = []
    for x in expTRange[i]:
        for start,n in enumerate(time):
           if n >= x:
               expIndexes[i].append(start)
               break

In [268]:
expTLen[label]

60

In [269]:
# controls how many seconds of data before each experiment to include
preCursorFactor = 6
averagedFrame = {}
expirementFrame = {}

for label in expIndexes:

    df1Index1 = expIndexes[label][0] - preCursorFactor
    df1Index2 = expIndexes[label][0] + expTLen[label]
    df1 = mergedData.iloc[df1Index1 : df1Index2 , 1: ].reset_index(drop = True)

    df2Index1 = expIndexes[label][1] - preCursorFactor
    df2Index2 = expIndexes[label][1] + expTLen[label]
    df2 = mergedData.iloc[df2Index1 : df2Index2 , 1: ].reset_index(drop = True)

    df3Index1 = expIndexes[label][2] - preCursorFactor
    df3Index2 = expIndexes[label][2] + expTLen[label]
    df3 = mergedData.iloc[df3Index1 : df3Index2 , 1: ].reset_index(drop = True)

    averagedFrame[label] = (df1 + df2 + df3)/3

    expirementFrame[label+' Exp1'] = df1
    expirementFrame[label+' Exp2'] = df2
    expirementFrame[label+' Exp3'] = df3
    
#assuming there were 3 expirements for each one

In [270]:
directory = './averagedData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in averagedFrame:
    temp=averagedFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)



In [271]:
directory = './expirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in expirementFrame:
    temp=expirementFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Increase the Resolution
pad out the dataframes to have values for every second.

In [272]:
stretchedDF = {}
for i in averagedFrame:
    tempFrame = averagedFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchedDF[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)     

In [273]:
stretchExpDf = {}
for i in expirementFrame:
    tempFrame = expirementFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchExpDf[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)

In [274]:
directory = './stretchedAvgData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchedDF:
    temp=stretchedDF[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

In [275]:
directory = './stretchedExpirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchExpDf:
    temp=stretchExpDf[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)