### General Imports

In [1]:
import pandas as pd
import numpy as np
from datetime import datetime as dt
import os
import glob
from cleanUp import cleanUp
from fillDf import fillDf
from fixYearStamp import fixYearStamp
from sklearn.cluster import KMeans
import time as clock


In [2]:
begin = clock.time()

### Data Cleaning
Passing the sensor data through the cleanUp function to get fix timestamps and delete null timestamps.

In [3]:
all_csv_files = glob.glob("./Data/*.txt")
# insert the desired start time
cutOffTime = '12/22/2020 12:49:00'
endTime = '12/22/2020 16:10:00'
# insert the time rectifying offsets. default of for nothing {'':0}
sensorConditions = {'S-01':1,'S-02':1,'S-03':1,'S-04':1,'S-05':1,'S-06':1,'S-07':1,'S-08':1,'S-09':1,'S-10':1,'S-11':1,'S-12':1,'S-13':1,'S-14':1,'S-15':1,'S-BU1':8,'S-BU2':8}
#This indicates which columns to keep. Here we're taking all of the dP info and the timestamps
columns = [0,1,6,7,8,9,10,11]
# Enable Data Checking
DataChecking = True
# Here are obversed timestamps that need to removed from the data
badTimes = ['     0/0/0      0:0:0','2165/165/165 165:165:85']
# Controls wether zones will be created automatically or by k-means clusters
ZoneAutomation = False
# Sets either the binning or the manual zones
numberOfZones = 4
# Sensors to exclude from zone
outdoorSensors = ['S-16','S-17','S-18','S-19']
# 10s of seconds before nebulization to include in the expirement csv files
preCursorFactor = 6
# which particle to analyze
particle = 'Dp>0.3'

In [4]:
expTRange = {
    'OR7 Unblocked':
    [pd.Timestamp('12-22-2020 13:08:00'),
    pd.Timestamp('12-22-2020 13:16:30'),
    pd.Timestamp('12-22-2020 13:34:15')],
    'OR7 Blocked':
    [pd.Timestamp('12-22-2020 13:44:30'),
    pd.Timestamp('12-22-2020 13:53:00'),
    pd.Timestamp('12-22-2020 13:59:00')],
    'OR16 Unblocked':
    [pd.Timestamp('12-22-2020 14:38:00'),
    pd.Timestamp('12-22-2020 14:44:00'),
    pd.Timestamp('12-22-2020 14:50:00')],
    'OR16 Blocked 1':
    [pd.Timestamp('12-22-2020 14:58:30'),
    pd.Timestamp('12-22-2020 15:05:15'),
    pd.Timestamp('12-22-2020 15:11:00')],
    'OR16 Blocked 2':
    [pd.Timestamp('12-22-2020 15:17:30'),
    pd.Timestamp('12-22-2020 15:23:00'),
    pd.Timestamp('12-22-2020 15:30:00')],
}

#enter in the expirement length as seconds/10
expTLen = {
    'OR7 Unblocked' :   5*6,
    'OR7 Blocked':      5*6,
    'OR16 Unblocked':   5*6,
    'OR16 Blocked 1':   5*6,
    'OR16 Blocked 2':   5*6
}

# Manual Zone set up notice how we are missing S-14
zoneList = {
    'Zone 1' : ['S-13','S-14'],
    'Zone 2' : ['S-09','S-11','S-12'],
    'Zone 3' : ['S-01','S-03','S-04','S-05','S-06','S-07','S-08','S-15'],
    'Zone 4' : ['S-BU1']
}
if not ZoneAutomation:
    numberOfZones = len(zoneList)

In [5]:
all_csv_files

['./Data\\S-01.txt',
 './Data\\S-03.txt',
 './Data\\S-04.txt',
 './Data\\S-05.txt',
 './Data\\S-06.txt',
 './Data\\S-07.txt',
 './Data\\S-08.txt',
 './Data\\S-09.txt',
 './Data\\S-11.txt',
 './Data\\S-12.txt',
 './Data\\S-13.txt',
 './Data\\S-14.txt',
 './Data\\S-15.txt',
 './Data\\S-BU1.txt']

Changed this to markdown so it won't run twice, had to fix the timestamps on S-12
filePath        = all_csv_files[11]
incorrectString = '21/3/22'
date            = '3/22/2021'
charTimeStart   = 11
charTimeEnd     = 21
offset          = 0
fixYearStamp(filePath,incorrectString,date,charTimeStart,charTimeEnd,offset)

In [6]:
data = cleanUp(cutOffTime,sensorConditions,all_csv_files,columns,badTimes)

S-01     2020-12-22 12:49:10      2020-12-22 16:09:31       mod: yes
S-03     2020-12-22 12:49:03      2020-12-22 16:09:35       mod: yes
S-04     2020-12-22 12:49:07      2020-12-22 16:09:34       mod: yes
S-05     2020-12-22 12:49:01      2020-12-22 16:10:50       mod: yes
S-06     2020-12-22 12:49:12      2020-12-22 16:09:25       mod: yes
S-07     2020-12-22 12:49:07      2020-12-22 16:12:53       mod: yes
S-08     2020-12-22 12:49:24      2020-12-22 16:57:24       mod: yes
S-09     2020-12-22 12:49:00      2020-12-22 16:11:30       mod: yes
S-11     2020-12-22 13:02:00      2020-12-22 16:09:43       mod: yes
S-12     2020-12-22 12:49:06      2020-12-22 16:10:06       mod: yes
S-13     2020-12-22 12:49:06      2020-12-22 16:08:44       mod: yes
S-14     2020-12-22 12:49:09      2020-12-22 16:13:45       mod: yes
S-15     2020-12-22 12:49:03      2020-12-22 16:10:00       mod: yes
S-BU1     2020-12-22 12:49:00      2020-12-22 16:11:40       mod: yes


### Exporting Data
Here we can export the organized data frames as csv files

In [7]:
directory = './proccessedData'
for x in data:
    temp=data[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Checking Data
Here we scan through the data for irregularities in data recording.

In [8]:
if DataChecking:
    directory = './dataInfo'
    if not os.path.exists(directory):
        os.makedirs(directory)
    fout = open('./dataInfo/time_Frequency_Error_Log.txt','wt')
    errors = {}
    errorCount = {}
    # Enter the expected interval here
    interval = 10
    for x in data:
        # errors keeps track of length of each time interval error that occurs
        errors[x] = set(())
        # errorCount keeps track of how many times each time interval error occured
        errorCount[x] = {}
        # counter keeps track of the total time interval errors per sensor
        counter = 0
        #shows the total
        temp = data[x]
        for idx,i in enumerate(temp['Date_Time']):
            try:
                if not ((temp['Date_Time'][idx+1] - i) == pd.Timedelta(seconds=interval)):
                    timeErr = temp['Date_Time'][idx+1] - i
                    if str(timeErr.seconds) in errorCount[x]:
                        errorCount[x][str(timeErr.seconds)] +=1
                    else:
                        errorCount[x][str(timeErr.seconds)] = 1

                    errors[x].add(timeErr)


                    counter += 1
            except:
                continue

        print(str(round(counter/len(temp)*100,2)),'% potential error in ', x)
        fout.write('potential error in '+ x +'\n' + str(round(counter/len(temp)*100,2))+'%'+'\n')

        # display the different types of errors
        lst = [i.seconds for i in errors[x]]
        frmt = "{:>4}"*len(lst)
        print(frmt.format(*lst))
        fout.write("Time Errors" + frmt.format(*lst)+ '\n')

        # display the quantity of each type of error
        lst = [errorCount[x][str(i.seconds)] for i in errors[x]]
        frmt = "{:>4}"*len(lst)
        print(frmt.format(*lst))
        fout.write("# Observed " + frmt.format(*lst)+ '\n')

        print()
        fout.write('\n')


    fout.close()

20.86 % potential error in  S-01
  26  21  17  13  19  30  20  11
   1   1   6   1   2   1 195   1

22.86 % potential error in  S-03
  21  12  25  27  16  18  62  84  51  20  31  11  22   9  13   0  90  15  70  59  17  19  63
   1   2   1   2   1   7   2   1   1 171   1   3   2   1   1   1   1   1   1   1  12   4   1

22.59 % potential error in  S-04
  21  22  17  23  60  18  35  24  78  19  20  13  25  11  49
   1   2  12   1   1  10   3   1   1   2 180   1   1   3   1

22.07 % potential error in  S-05
  26  21  16  17  23  12  18   7  19  24  25  20  15
   1   4   1  10   2   1   7   1   2   1   1 188   1

19.98 % potential error in  S-06
  28  20  85
   1 197   1

20.89 % potential error in  S-07
   9  16  22  14  12  34  18  24  19  30  25  20  15  17  13  11
   1   1   2   1   1   1   1   1   9   2   1 184   2   2   1   2

99.76 % potential error in  S-08
  34  35 138 139 140  37
 164 241   3   2   1   1

0.41 % potential error in  S-09
   9  17   3  31  20
   1   1   1   1   1

0

Notice there are quite a few repeating errors here in our data set. We can either choose to interpolate the data inbetween or pad it with 0s. For gaps <40s i will interpolate, but for gaps >40 i will 0 pad.

In [9]:

fout = open('./dataInfo/interpolation_Effect_Log.txt','wt')
interpDF = {}

for x in data:
    df = data[x]
    cutoff = 40
    freq = '10S'
    try:
        interpDF[x],accuracy = fillDf(df,freq,cutOffTime,endTime,cutoff)
        print(x,' ',accuracy)
        fout.write(x+' '+ '\n' + accuracy[0]+ '\n'+ accuracy[1]+ '\n'+ accuracy[2] +'\n\n')
    except IndexError:
        print(x,'NO DATA')
        fout.write(x+'NO DATA'+'\n')
fout.close()        

S-01   ['% of values from interpolation : 34.219', '% of values from 0-padding : 0.0', '% of values not changed : 65.781']
S-03   ['% of values from interpolation : 33.306', '% of values from 0-padding : 4.402', '% of values not changed : 62.292']
S-04   ['% of values from interpolation : 34.967', '% of values from 0-padding : 1.578', '% of values not changed : 63.455']
S-05   ['% of values from interpolation : 35.323', '% of values from 0-padding : 0.0', '% of values not changed : 64.677']
S-06   ['% of values from interpolation : 33.167', '% of values from 0-padding : 0.665', '% of values not changed : 66.168']
S-07   ['% of values from interpolation : 33.831', '% of values from 0-padding : 0.0', '% of values not changed : 66.169']
S-08   ['% of values from interpolation : 94.186', '% of values from 0-padding : 5.814', '% of values not changed : 0.0']
S-09   ['% of values from interpolation : 0.415', '% of values from 0-padding : 0.0', '% of values not changed : 99.585']
S-11   ['% o

### Export Data
export the newly interpolated data

In [10]:
directory = './interpolatedData'
for x in interpDF:
    temp=interpDF[x]
    if not os.path.exists(directory):
        os.makedirs(directory)
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Merge the DataFrames

In [11]:
length = []
for x in interpDF:
    length.append(len(interpDF[x]))
index = min(length)
lowIDX,lowValue = [[i,value] for i,value in enumerate(length) if value == index][0]
print(lowIDX,lowValue)

10 1199


for count,key in enumerate(list(interpDF.keys())):
    print(count+1,key,temp[count+1])

In [12]:
columns = list(interpDF.keys())
mergedData = pd.DataFrame({'Date_Time':interpDF[columns[lowIDX]]['Date_Time']})
for idx,column in enumerate(columns):
    mergedData[column] = interpDF[column][particle]
Average = np.mean(mergedData,axis=1)
Variance = np.var(mergedData,axis=1)
mergedData['Average'] = Average
mergedData['Variance'] = Variance
mergedData

Unnamed: 0,Date_Time,S-01,S-03,S-04,S-05,S-06,S-07,S-08,S-09,S-11,S-12,S-13,S-14,S-15,S-BU1,Average,Variance
0,2020-12-22 12:49:00,0,0,0,0,18,0,0,9,0,0,0,21,0,78,9.000000,414.000000
1,2020-12-22 12:49:10,0,9,0,0,18,0,0,9,0,0,9,0,0,78,8.785714,397.882653
2,2020-12-22 12:49:20,9,9,0,0,9,0,0,9,0,102,18,0,0,49,14.642857,746.515306
3,2020-12-22 12:49:30,9,33,0,0,0,0,0,9,0,78,9,0,0,21,11.357143,432.229592
4,2020-12-22 12:49:40,0,33,0,9,0,0,0,0,0,9,0,9,0,10,5.000000,77.285714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1194,2020-12-22 16:08:00,0,0,0,0,9,0,0,9,0,0,1023,0,0,4,74.642857,69193.229592
1195,2020-12-22 16:08:10,0,0,0,18,0,0,0,21,0,0,1319,9,0,0,97.642857,114794.943878
1196,2020-12-22 16:08:20,4,0,0,18,0,9,0,0,0,33,1615,0,9,0,120.571429,171877.959184
1197,2020-12-22 16:08:30,30,0,0,0,0,9,0,0,0,33,1911,0,9,0,142.285714,240759.918367


### Increase Resolution on mergedData

In [13]:

for i in mergedData:
    tempFrame = mergedData.values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    hiResMergedDF = pd.DataFrame(tempList, columns = mergedData.keys())

### Export Merged Frames

In [14]:
directory = './mergedData/'
if not os.path.exists(directory):

    os.makedirs(directory)

location = os.path.join(directory+'mergedFrame.csv')
hiResMergedDF.to_csv(location,index=False)

### Create csv files for each animation
We have 3 expirements in each that we want to average across the range

In [15]:
# mergedData = pd.read_csv('./mergedData/mergedFrame.csv',parse_dates=[0])

In [16]:
mergedData

Unnamed: 0,Date_Time,S-01,S-03,S-04,S-05,S-06,S-07,S-08,S-09,S-11,S-12,S-13,S-14,S-15,S-BU1,Average,Variance
0,2020-12-22 12:49:00,0,0,0,0,18,0,0,9,0,0,0,21,0,78,9.000000,414.000000
1,2020-12-22 12:49:10,0,9,0,0,18,0,0,9,0,0,9,0,0,78,8.785714,397.882653
2,2020-12-22 12:49:20,9,9,0,0,9,0,0,9,0,102,18,0,0,49,14.642857,746.515306
3,2020-12-22 12:49:30,9,33,0,0,0,0,0,9,0,78,9,0,0,21,11.357143,432.229592
4,2020-12-22 12:49:40,0,33,0,9,0,0,0,0,0,9,0,9,0,10,5.000000,77.285714
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1194,2020-12-22 16:08:00,0,0,0,0,9,0,0,9,0,0,1023,0,0,4,74.642857,69193.229592
1195,2020-12-22 16:08:10,0,0,0,18,0,0,0,21,0,0,1319,9,0,0,97.642857,114794.943878
1196,2020-12-22 16:08:20,4,0,0,18,0,9,0,0,0,33,1615,0,9,0,120.571429,171877.959184
1197,2020-12-22 16:08:30,30,0,0,0,0,9,0,0,0,33,1911,0,9,0,142.285714,240759.918367


In [17]:
time = mergedData['Date_Time']
expIndexes = {}
for i in expTRange:
    expIndexes[i] = []
    for x in expTRange[i]:
        for start,n in enumerate(time):
           if n >= x:
               expIndexes[i].append(start)
               break

## Determining Zones
Here we first create 'averagedFrame's. These are dictionaries that at each 'label' (which corresponds to the name of an expirement) we have a pandas dataframe containing the results of all of the trails in an expirement summed, and then divided by the total number of trails.
Anytime you are adjusting the Zones, everything below here must be run. The values of many of these DataFrames are mutated

In [18]:
expIndexes

{'OR7 Unblocked': [114, 165, 272],
 'OR7 Blocked': [333, 384, 420],
 'OR16 Unblocked': [654, 690, 726],
 'OR16 Blocked 1': [777, 818, 852],
 'OR16 Blocked 2': [891, 924, 966]}

In [19]:
# preCursorFactor is defined at the start
averagedFrame = {}
expirementFrame = {}

for label in expIndexes:
    runSumFrames = expIndexes[label][0]-expIndexes[label][0]
    for idx,time in enumerate(expIndexes[label]):
        start = expIndexes[label][idx] - preCursorFactor
        end = expIndexes[label][idx] + expTLen[label]
        expirementFrame[label+' Exp '+str(idx+1)] = mergedData.iloc[ start : end , 1: ].reset_index(drop = True)
        runSumFrames += expirementFrame[label+' Exp '+str(idx+1)]
        
    averagedFrame[label] = runSumFrames/(idx+1)

Calculating the correct Zones for each expirement

In [20]:
if ZoneAutomation:
    # numberOfZones is defined at the start
    ZoneAssignments = {}
    for frame in averagedFrame:
        # at this point averagedFrame should just be the averaged sum of the expirementFrame trails. Last two columns are overall average and varaince so they should be ignored.
        avgFrm = averagedFrame[frame]
        # outdoorSensors must have its spelling exactly match
        columns = list(set(avgFrm.keys()[:-2])- set(outdoorSensors))
        columns.sort()

        X = {}
        for column in columns:
            value,index = max([(value,index) for index,value in enumerate(avgFrm[column])]) 
            X[column] = np.array([np.log(value),index])
        X = [X[i] for i in X]
        kmeans = KMeans(n_clusters=numberOfZones,random_state=0).fit(X)
        ZoneAssignments[frame] = kmeans.labels_
    z = numberOfZones
    ZDf = pd.DataFrame(ZoneAssignments)
    ZDf = ZDf.append(pd.DataFrame([[z]*len(outdoorSensors)]*len(expIndexes),columns = ZoneAssignments.keys()),ignore_index=True)
    ZoneAssignments = ZDf

In [21]:
if not ZoneAutomation:
    ZoneAssignments = {}
    for frame in averagedFrame:
        # at this point averagedFrame should just be the averaged sum of the expirementFrame trails. Last two columns are overall average and varaince so they should be ignored.
        avgFrm = averagedFrame[frame]
        # outdoorSensors must have its spelling exactly match
        columns = list(set(avgFrm.keys()[:-2]))
        columns.sort()
        ZoneAssignments[frame] = [0]*len(columns)
        for value,zone in enumerate(zoneList):
            for sensor in zoneList[zone]:
                ZoneAssignments[frame][columns.index(sensor)] = value
    ZDf = pd.DataFrame(ZoneAssignments)

In [22]:
zoneList

{'Zone 1': ['S-13', 'S-14'],
 'Zone 2': ['S-09', 'S-11', 'S-12'],
 'Zone 3': ['S-01', 'S-03', 'S-04', 'S-05', 'S-06', 'S-07', 'S-08', 'S-15'],
 'Zone 4': ['S-BU1']}

In [23]:
directory = './dataInfo'
if not os.path.exists(directory):
    os.makedirs(directory)
location = os.path.join(directory,'ZoneAssignments.csv')
ZDf.to_csv(location,index=False)

Zoning the expirement data.

In [24]:
# len(ZoneAssignments[key])
avgFrm

Unnamed: 0,S-01,S-03,S-04,S-05,S-06,S-07,S-08,S-09,S-11,S-12,S-13,S-14,S-15,S-BU1,Average,Variance
0,3.0,20.0,46.0,3.0,3.0,3.0,0.0,0.0,6.0,0.0,56.0,16.0,6.0,1.333333,11.666667,468.8741
1,3.0,3.0,18.0,1.333333,4.333333,1.333333,0.0,3.0,3.0,3.0,23.0,29.0,24.0,1.333333,8.380952,300.983
2,4.666667,0.0,64.333333,0.0,10.0,6.0,0.0,3.0,15.0,14.0,0.0,26.0,17.0,3.0,11.642857,813.6582
3,10.0,0.0,60.0,0.0,0.0,9.0,0.0,0.0,15.0,20.0,16.0,6.0,6.0,1.333333,10.238095,786.1395
4,6.0,0.0,60.0,0.0,0.0,12.0,0.0,3.0,3.0,0.0,0.0,6.0,0.0,0.0,6.428571,725.9694
5,6.0,0.0,30.0,0.0,3.0,9.0,0.0,6.0,6.0,0.0,0.0,0.0,3.0,1.333333,4.595238,182.7262
6,0.0,3.0,3.0,0.0,9.0,9.0,0.0,9.0,6.0,0.0,412.0,1839.0,3.0,4.333333,164.095238,703809.9
7,0.0,3.0,3.0,0.0,6.0,4.333333,0.0,3.0,3.0,3.0,3336.666667,6637.0,0.0,5.666667,714.619048,10454240.0
8,1.333333,0.0,0.0,0.0,0.0,0.0,0.0,14.0,0.0,108.0,22511.0,5226.0,52.0,4.333333,1994.047619,39636670.0
9,0.0,0.0,25.0,0.0,0.0,0.0,0.0,60.0,0.0,421.0,40567.0,3472.0,22.0,3.0,3183.571429,140204700.0


## Zoning the Data

In [25]:
zonedAvgFrame = {}
for key in ZoneAssignments:
    occourances = [list(ZoneAssignments[key]).count(x) for x in set(ZoneAssignments[key])]
    zoneRunSum = [0]*numberOfZones
    zonedAvgFrame[key] = averagedFrame[key]
    for idx,column in enumerate(columns):
        zoneRunSum[ZoneAssignments[key][idx]] += zonedAvgFrame[key][column]
    for idx in range(numberOfZones):
        zonedAvgFrame[key]['Zone '+str(idx+1)] = zoneRunSum[idx]/occourances[idx]

In [26]:
zonedAvgFrame[key]['Zone '+str(idx)]

0     10.500000
1      6.875000
2     12.750000
3     10.625000
4      9.750000
5      6.375000
6      3.375000
7      2.041667
8      6.666667
9      5.875000
10    11.458333
11    25.541667
12    32.916667
13    52.250000
14    56.041667
15    43.875000
16    45.500000
17    61.458333
18    52.166667
19    34.250000
20    39.625000
21    43.875000
22    32.000000
23    14.041667
24    21.000000
25    31.583333
26    18.125000
27    18.500000
28    16.333333
29    14.708333
30    15.000000
31    12.750000
32     3.666667
33     5.750000
34    10.791667
35    11.250000
Name: Zone 3, dtype: float64

In [27]:
# relies on columns still being the values of S-01 - last sensor

# Declare an empty dictionary for storing the averaged data for each expirement at the end
zonedExpFrame = {}
# create a list of all of the various dict keys in expirementFrame so that we can iterate through them to get the data
labels = list(expirementFrame.keys())
# Take the labels list and remove the Exp # from it, so that now we have a list of keys that we can use to correctly save to create correctly corresponding keys for a dictionary that will store the averages
keyList = [x.split(' Exp')[0] for x in labels]

for index,exp in enumerate(labels):
    # set the key variable to correspond to the exp variable
    key = keyList[index]
    # Create a runnning sum to keep track of the values
    zoneRunSum = [0]*numberOfZones
    # set the give the zoneExpFrame the same 
    zonedExpFrame[exp] = expirementFrame[exp]
    occourances = [list(ZoneAssignments[key]).count(x) for x in set(ZoneAssignments[key])]
    for idx,column in enumerate(columns):
        zoneRunSum[ZoneAssignments[key][idx]] += zonedExpFrame[exp][column]
    for idx in range(numberOfZones):
        zonedExpFrame[exp]['Zone '+str(idx+1)] = zoneRunSum[idx]/occourances[idx]
        


In [28]:
column

'S-BU1'

In [29]:
column

'S-BU1'

In [30]:
directory = './averagedData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in averagedFrame:
    temp=averagedFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)



In [31]:
directory = './expirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in expirementFrame:
    temp=expirementFrame[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

### Increase the Resolution
pad out the dataframes to have values for every second.

In [32]:
stretchedDF = {}
for i in averagedFrame:
    tempFrame = averagedFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchedDF[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)     

In [33]:
stretchExpDf = {}
for i in expirementFrame:
    tempFrame = expirementFrame[i].values
    tempList = []
    for idx,x in enumerate(tempFrame):
        try:
            increment = (tempFrame[idx+1] - x)/10
            for count in range(10):
                tempList.append(x+increment*count)
        except IndexError:
            tempList.append(x)
            continue
    stretchExpDf[i] = pd.DataFrame(tempList, columns = expirementFrame[list(expirementFrame.keys())[0]].columns)

In [34]:
directory = './stretchedAvgData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchedDF:
    temp=stretchedDF[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

In [35]:
directory = './stretchedExpirementData'
if not os.path.exists(directory):
    os.makedirs(directory)
for x in stretchExpDf:
    temp=stretchExpDf[x]
    location = os.path.join(directory,x+'.csv')
    temp.to_csv(location,index=False)

In [36]:
end = clock.time()
print(end-begin)

17.962891578674316
