# Elanco Lyo Trials
Read in datafiles & plot relevant variables



In [None]:
# add libraries, etc.
%run -i commonRoutines.ipynb

In [None]:
%run -i pulseFunctions.ipynb

Merge datafiles

In [None]:
# each folder in directory contains files to merge
dirname = r"\\ion\ion.grp\ElancoGlobalEng\AugustaLyoTrials\1 Study Data Files (SYMYX)"
folders = os.listdir(dirname) # list of folders w datafiles
folders.sort() # sort to load data in StudyRun order
dataPath = r"Data\Exported" # datafiles to be stitched are in this folder
mergePath = r"Data" # stitched data will be written to this folder

# dataframes to read files into
sysFrame = pd.DataFrame()
probFrame = pd.DataFrame()
sysList_ = []
probList_ = []
dataIndex = 0

# iterate through list of folders & load data
for folder_ in folders:
    
    # don't load data from template folder
    if 'CopyPasteTemplate' in folder_:
        continue
    if '.db' in folder_:
        continue
    if 'New folder' in folder_:
        continue
    if 'PS' in folder_:
        continue

        
    # increment dataIndex for each folder
    dataIndex += 1
    print('Loading %d: %s' % (dataIndex, folder_))

    #check if dataset already loaded
    checkPath = os.path.join(dirname, folder_, mergePath)
    checkFiles = glob.glob(checkPath + "/*.csv")
    merged = False
    for file_ in checkFiles:
        if 'Export' in file_:
            merged = True
            print('%s already merged' % folder_)
    if merged == True:
        path = os.path.join(dirname, folder_, mergePath)
        headerRows = 0
    else:    
        # create list of files to merge
        path = os.path.join(dirname, folder_, dataPath)
        headerRows = 9
  
    filenames = glob.glob(path + "/*.csv")
    
    # iterate through files & read in data, appending to appropriate dataframe
    for file_ in filenames:
        
        df = pd.read_csv(file_, index_col=None, header=headerRows)
        
        #add datetime column
        df['datetime'] = df['Date    '] + " " + df[' Time    ']
        try:
            df['datetime'] = pd.to_datetime(df['datetime'], format='%m/%d/%y %H:%M:%S')
        except:
            df['datetime'] = pd.to_datetime(df['datetime'], format='%m/%d/%Y %H:%M:%S')
        
        # add column indexing each dataset read in
        df['dataset'] = dataIndex
        df['folderName'] = folder_
        
        #append dataset to list        
        if 'SYS' in file_:
            #print('Adding ' + file_ + ' to SYS')
            sysList_.append(df)
        elif 'PRO' in file_:
            #print('Adding ' + file_ + ' to PROB')
            probList_.append(df)

sysFrame = pd.concat(sysList_)
probFrame = pd.concat(probList_)
    
print('Data loaded')


In [None]:
# remove duplicate rows
print('...deleting duplicates...')
sysFrame = sysFrame.drop_duplicates(['datetime'])
probFrame = probFrame.drop_duplicates(['datetime'])
sysFrame = sysFrame[sysFrame[' ENCORE:VAC4.F_CV (S)'] != '        ???          ']
probFrame = probFrame[probFrame[' ENCORE:TPAVG.F_CV (S)'] != '        ???          ']
    
# convert columns to correct datatype
print('...setting datatypes...')
sysFrame[[' ENCORE:TEMPCOND1.F_CV (S)', ' ENCORE:TEMPSHELF.F_CV (S)', ' ENCORE:TEMPSREF.F_CV (S)', ' ENCORE:TPAVG.F_CV (S)', 
         ' ENCORE:VAC.F_CV (S)', ' ENCORE:VAC2.F_CV (S)', 
         ' ENCORE:VAC4.F_CV (S)']] = sysFrame[[' ENCORE:TEMPCOND1.F_CV (S)', 
                                                           ' ENCORE:TEMPSHELF.F_CV (S)', ' ENCORE:TEMPSREF.F_CV (S)', ' ENCORE:TPAVG.F_CV (S)', 
                                                           ' ENCORE:VAC.F_CV (S)', ' ENCORE:VAC2.F_CV (S)', 
                                                           ' ENCORE:VAC4.F_CV (S)']].apply(pd.to_numeric, errors='coerce')

#add diff between MKS & pirani
sysFrame['diff'] = sysFrame[' ENCORE:VAC2.F_CV (S)'] - sysFrame[' ENCORE:VAC.F_CV (S)']

print('...sorting data...')
sysFrame = sysFrame.sort_values(['datetime'], ascending=True)
probFrame = probFrame.sort_values(['datetime'], ascending=True)

# merge & reindex dataframes
print('...reindexing...')
sysFrame['index'] = np.arange(len(sysFrame))
sysFrame.set_index('index', inplace=True)
probFrame['index'] = np.arange(len(probFrame))
probFrame.set_index('index', inplace=True)

# check time from beginning
sysFrame['dTime'] = sysFrame['datetime'] - sysFrame['datetime'][0]

# view available variables
sysFrame.columns

#sysFrame[-1:]

In [None]:
# write merged data sets to folder
writeDataIndex = 0

# group data
dataGroup = sysFrame.groupby(['dataset'])
probDataGroup = probFrame.groupby(['dataset'])

for folder_ in folders:
    
    # skip over template folder
    if 'CopyPasteTemplate' in folder_:
        continue
    if 'db' in folder_:
        continue
    if 'New folder' in folder_:
        continue
    if 'PS' in folder_:
        continue
        
    writeDataIndex += 1
    path = os.path.join(dirname, folder_)
    
    #check if merged file already exists
    checkPath = os.path.join(dirname, folder_, mergePath)
    checkFiles = glob.glob(checkPath + "/*.csv")
    merged = False
    for file_ in checkFiles:
        if 'Export' in file_:
            merged = True
    if merged == True:
        print('%s merge files already written' % folder_)
        continue
        
    # merge file not already created. write to csv
    sys = dataGroup.get_group(writeDataIndex)
    prob = probDataGroup.get_group(writeDataIndex)
    lastDate = pd.to_datetime(sys['datetime'][-1:].values[0])
    dateString = '%02d%02d%d' % ( lastDate.day, lastDate.month, lastDate.year - 2000)
    sysFilename = os.path.join(checkPath, '%s%sSYSExport.csv' % (dateString, folder_))
    probFilename = os.path.join(checkPath, '%s%sPROBExport.csv' % (dateString, folder_))
    
    print('Writing %s' % folder_)
    sys[['Date    ', ' Time    ', ' ENCORE:TEMPCOND1.F_CV (S)',
       ' ENCORE:TEMPSHELF.F_CV (S)', ' ENCORE:TEMPSREF.F_CV (S)',
       ' ENCORE:TPAVG.F_CV (S)', ' ENCORE:VAC.F_CV (S)',
       ' ENCORE:VAC2.F_CV (S)', ' ENCORE:VAC4.F_CV (S)',
       ' ENCORE:VACREF.F_CV (S)']].to_csv(sysFilename, index=False)
    prob[['Date    ', ' Time    ', ' ENCORE:TP01.F_CV (S)',
       ' ENCORE:TP02.F_CV (S)', ' ENCORE:TP03.F_CV (S)',
       ' ENCORE:TP04.F_CV (S)', ' ENCORE:TP05.F_CV (S)',
       ' ENCORE:TP06.F_CV (S)', ' ENCORE:TP07.F_CV (S)',
       ' ENCORE:TP08.F_CV (S)', ' ENCORE:TPAVG.F_CV (S)']].to_csv(probFilename, index=False)
    
print('*** Finished writing files ***')    
    

In [None]:
# rename columns
sysFrame = sysFrame.rename(columns={'Date    ' : 'Date', ' Time    ' : 'Time', ' ENCORE:TEMPCOND1.F_CV (S)' : 'CondenserT',
       ' ENCORE:TEMPSHELF.F_CV (S)' : 'ShelfT', ' ENCORE:TEMPSREF.F_CV (S)' : 'RefT',
       ' ENCORE:TPAVG.F_CV (S)' : 'AvgT', ' ENCORE:VAC.F_CV (S)' : 'MKS',
       ' ENCORE:VAC2.F_CV (S)' : 'Pirani', ' ENCORE:VAC4.F_CV (S)' : 'Condenser',
       ' ENCORE:VACREF.F_CV (S)' : 'VacRef'})

# grouped variable used to subset data by dataset
print('...grouping datasets...')
#dataGroup = data.groupby(['dataset'])
dataGroup = sysFrame.groupby(['dataset'])
probDataGroup = probFrame.groupby(['dataset'])
print('Grouped')

Load Augusta run for reference

In [None]:
Augusta = pd.read_csv(r"C:\Users\c213418\Documents\Elanco\Elanco\AUGUSTA_TEST_01.csv", index_col=None, header=0)

Augusta['datetime'] = pd.to_datetime(Augusta['Unnamed: 0'], infer_datetime_format=True)

Augusta = Augusta.rename(columns = {'Pulse Number' : 'PulseNumber', 'Chamber Vacuum' : 'ChamberVac', 'Condesner Coil Temp' : 'CondenserT',
       'Shelf Inlet Temp' : 'ShelfTIn', 'Shelf Outlet Temp' : 'ShelfTOut'})

Augusta.columns

In [None]:
## Find Cycle Start Time & index

# Augusta data
searchStart = 0 # indicates status of search
cycleStart_Augusta = 0 # index of start of cycle

for i in Augusta.index:
    if searchStart == 0: # looking for initial drop in T
        if Augusta['ShelfTIn'][i] < -43:
            searchStart = 1
    if searchStart == 1: 
        if Augusta['ShelfTIn'][i] < -50: # confirm not going to holding T
            searchStart = 0
        if Augusta['ShelfTIn'][i] > -38: #-41.5: # start indicated by return up
            cycleStart_Augusta = i
            searchStart = 0
            break
            
Augusta['dTime'] = Augusta['datetime'] - Augusta['datetime'][cycleStart_Augusta]

##  StudyXRunY data
# initialize dataframe to hold results
cycleStart_runx = pd.DataFrame(index=np.arange(0,dataIndex), columns=('Study Number', 'cycleOffset', 'cycleStart', 'cycleStartTime'))
cycleStartIndex = 0

## compare cycle start to Justin/Karl's calculations
startTimes = ['2016-12-05 20:54:00', '2016-12-07 20:02:00', '2016-12-09 19:34:50', '2016-12-12 20:13:40', '2016-12-14 20:11:50',
              '2016-12-16 19:50:20', '2016-10-19 20:51:00', '2016-10-24 21:10:00', '2016-10-26 22:05:00', '2016-10-31 18:35:00',
              '2016-11-02 19:18:00', '2016-11-07 19:36:17', '2016-11-09 20:25:00', '2016-11-14 19:55:00', '2016-11-16 21:05:00', 
              '2016-11-18 20:24:00', '2016-11-21 20:23:00', '2016-12-02 19:01:50']

startTimes = pd.to_datetime(startTimes)

# loop through each dataset to determine cycle start
for index, group in dataGroup:
    dropT = -43.9
    print('Searching %d for cycle start' %index)
    first= False
    if (group['folderName'][-1:].item()=="Study20Run1"):
        dropT = -29.9
        first = True 
    for i in group.index:
        if searchStart == 0: # looking for drop to -44
            if group['RefT'][i] < dropT:
                searchStart = 1
        if searchStart == 1: # checking that temp dropped to -44 & held before ramping up
            if group['RefT'][i] < -50:
                searchStart = 2 # not start of cycle, look again
            elif (group['folderName'][-1:].item()=="Study20Run1") & first:
                first = False
                searchStart = 2
            elif group['RefT'][i] > dropT: #-41.5: # start of cycle found (38)
                cycleStartIndex = i
                cycleStart_runx['Study Number'][index-1] = group['folderName'][-1:].item()
                cycleStart_runx['cycleOffset'][index-1] = i - group.index[0]
                cycleStart_runx['cycleStart'][index-1] = i
                cycleStart_runx['cycleStartTime'][index-1] = group['datetime'][i]
                searchStart = 0
                break
        if searchStart == 2: # drop to -44 not start of cycle, reset to look again
            if group['RefT'][i] > -20:
                searchStart = 0
run_shift = cycleStart_Augusta - cycleStart_runx['cycleOffset']

cycleStart_runx


In [None]:
piraniData = pd.DataFrame(index=np.arange(0,dataIndex), columns=('Study Number', 'Pirani Start', 'Pirani End', 'MKS_Start', 
                                                                'diffStart', 'diffEnd', 'diff50', 'Offset', 'Target', 'Target2'))

for index, group in dataGroup:
    # collect variables for each dataset (studyName, pirani/diff data)
    print(group['folderName'][-1:].item())
    piraniData['Study Number'][index-1] = group['folderName'][-1:].item()
    #pirani_moving = movingAverage(group1['Pirani'], 20)
    group['piraniFiltered'] = firstOrder(group['Pirani'], 80, 0.2)
    group['MKSFiltered'] = firstOrder(group['MKS'], 80, 0.2)
    group['diffFiltered'] = firstOrder(group['diff'], 80, 0.2)
    shift = group.index[0] # to access correct index in looping (indexing does not start at 0 for each new set)
    cycleStart = cycleStart_runx['cycleStart'][index-1] - shift
    
    
    # scan for measurements... 
    pulseCheck = np.amin(group['Pirani'][cycleStart+2000:cycleStart+5000])
    if pulseCheck < 400: # cycle has pulses
        target = determineTargetDiff(cycleStart)
    else:
        target = determineTargetDiffNoPulse(cycleStart)
    
piraniData[['Study Number', 'diffStart', 'MKS_Start', 'Offset', 'Target2']]

In [None]:
# cycle data

# initialize dataframes to hold results & indices of results

cycleData = pd.DataFrame(index=np.arange(0,dataIndex), columns=('Study Number', 'Cycle Start Time', 'Primary Duration', 
                                                                'pirani', 'Pir dry end time', 'Pulse Count', 
                                                                'Product Approach Shelf T', 'length', 'lengthTotal',
                                                                'Cycle Length', 'Cycle End Time', 'AugustaAvgTime',
                                                               'Augusta Average Time', 'TimeSaved', 'Time Saved'))
cycleIndices = pd.DataFrame(index=np.arange(0,dataIndex), columns=('Cycle Start', 'Pirani Transition', 
                                                                'Product Approach Shelf T', 'Cycle End'))

AugustaTime = pd.Timedelta('32 hours 1 min')
cycleData['AugustaAvgTime'] = AugustaTime
cycleData['Augusta Average Time'] = '%d hours %d minutes' % (AugustaTime.total_seconds()//3600,
                                                        (AugustaTime.total_seconds()%3600)//60)

# find transition from primary to secondary drying
for index, group in dataGroup:
    
    cycleData['Study Number'][index-1] = group['folderName'][-1:].item()
    cycleData['Cycle Start Time'][index-1] = cycleStart_runx['cycleStartTime'][index-1]
    cycleIndices['Cycle Start'][index-1] = cycleStart_runx['cycleStart'][index-1]
    shift = group.index[0]
    cycleStart = cycleStart_runx['cycleStart'][index-1] - shift
    MKStart = piraniData['MKS_Start'][index-1]
    diff50 = piraniData['Target2'][index-1]
    diffEnd = piraniData['diffEnd'][index-1]
    transitionPulse = pd.DataFrame()
    
    if group['folderName'][-1:].item()=="Study8Run1" : #vacuum did not pull all the way down..
        diffEnd += 20

    # search for Pirani transition
    pulseCheck = np.amin(group['Pirani'][cycleStart+2000:cycleStart+5000])
    print(pulseCheck)
    if pulseCheck < 400:
        transition = pulseTransition(group, index, shift, cycleStart, MKStart, diff50, diffEnd)
    else:
        transition = noPulseTransition(group, index, shift, cycleStart, MKStart, diff50, diffEnd)
    
    
    # find end of cycle
    print('Searching %d for end of cycle' %index)
    finalSearchIndex = transition - shift
    for i in group.index[finalSearchIndex:]:
        if group['ShelfT'][i] < 20:
            totalTime = group['datetime'][i] - cycleData['Cycle Start Time'][index-1]
            cycleData['lengthTotal'][index-1] = totalTime
            cycleData['Cycle Length'][index-1] = '%d hours %d minutes' % (totalTime.total_seconds()//3600,
                                                                        (totalTime.total_seconds()%3600)//60)
            cycleIndices['Cycle End'][index-1] = i
            cycleData['Cycle End Time'][index-1] = group['datetime'][i]
            
            cycleData['TimeSaved'][index-1] = cycleData['AugustaAvgTime'][index-1] - cycleData['lengthTotal'][index-1]
            cycleData['Time Saved'][index-1] = '%d hours %d minutes' % (cycleData['TimeSaved'][index-1].total_seconds()//3600,
                                                                (cycleData['TimeSaved'][index-1].total_seconds()%3600)//60)
            break


cycleData[['Study Number', 'Cycle Start Time', 'Primary Duration', 'Pir dry end time', 'Cycle Length', 'Cycle End Time', 'Augusta Average Time', 'Time Saved']]

In [None]:
cycleData[['Study Number', 'Cycle Start Time', 'Primary Duration', 'Pir dry end time', 'Cycle Length', 'Cycle End Time', 'Augusta Average Time', 'Time Saved']].to_csv('primaryDuration2_08FEB2017.csv')
#piraniData[['Study Number', 'diffStart', 'MKS_Start', 'Offset', 'Target2']].to_csv('piraniTransition_23JAN.csv')
#a = [False, True, False]

#startTimes = cycleData['Cycle Start Time']
#startTimes = ['2016-12-05 20:54:00', '2016-12-07 20:02:00', '2016-12-09 19:34:50', '2016-12-12 20:13:40', '2016-12-14 20:11:50', '2016-12-16 19:50:20', 
#             '2016-10-19 20:51:00', '2016-10-24 21:10:00', '2016-10-26 22:05:00', '2016-10-31 18:35:00' , '2016-11-02 19:18:00', '2016-11-07 19:36:17', 
#             '2016-11-09 20:25:00', '2016-11-14 19:55:00', '2016-11-16 21:05:00', '2016-11-18 20:24:00', '2016-11-21 20:23:00', '2016-12-02 19:01:50', NaN]

#startTimes = pd.to_datetime(startTimes)

In [None]:
pp = PdfPages(r'\\ion\ion.grp\ElancoGlobalEng\AugustaLyoTrials\2 Results and Report File (SYMXYX)\Analysis\primaryDuration_22FEB2017.pdf')

for index, group in dataGroup:
    studyName = group['folderName'][-1:].item()
    cycleStartIndex = cycleStart_runx['cycleStart'][index-1]
    group['dTime'] = group['datetime'] - group['datetime'][cycleStartIndex]
    startCalc = cycleData['Cycle Start Time'][index-1] - group['datetime'][cycleStartIndex]
    group['dTime'] = group['dTime'] / timedelta(days=1) * 24 # convert timedelta object to hours
    diff50 = piraniData['Target2'][index-1]
    
    if studyName=="Study9Run1" or studyName=="Study9Run2":
        continue
    #fig = plt.figure(index)
    fig = plt.figure(figsize=(10,7))
    sub = dataIndex * 100 + 10 + index
    sub = 111
    ax1 = fig.add_subplot(sub)
    
    ax1.set_title('%s' %studyName)
    ax1.set_ylabel('Temperature (deg C)')
    ax1.set_ylim(-90, 70)
    #ax1.set_ylim(-50, -30)
    ax1.set_xlabel('Time (hrs)')
    
    
    ax2 = ax1.twinx()
    ax2.set_ylabel('Pressure (mTorr)')
    ax2.set_ylim(0, 3000)
    
    start = cycleIndices['Cycle Start'][index-1] #- group.index[0]
    trans = cycleIndices['Pirani Transition'][index-1]
    end = cycleIndices['Cycle End']
    
    ln1 = ax1.plot(Augusta['dTime'] / timedelta(days=1) * 24, Augusta['ShelfTIn'], color="orange", label="Augusta")
    ln2 = ax1.plot(group['dTime'], group['ShelfT'], color="green", label='%s Shelf T' %studyName)
    ax1.plot(group['dTime'][start], group['ShelfT'][start], 'ro')
    #ax1.axvline(startCalc / datetime.timedelta(days=1) * 24)
    ln4 = ax2.plot(group['dTime'], group['Pirani'], color="red", label="Pirani")
    ln3 = ax1.plot(group['dTime'], group['AvgT'], color="blue", label="Product Temp")
    ln5 = ax2.plot(group['dTime'], group['MKS'], color = "blue", label = "MKS")
    ax2.plot(group['dTime'], group['diff'], color="green", label = "diff")
    if not (math.isnan(trans)):
        #ax2.plot(group['dTime'][trans], group['Pirani'][trans], 'ro')
        ax2.plot(group['dTime'][trans], diff50, 'ro')
    
    ax2.axhline(piraniData['Target2'][index-1], color="red")
    ax2.axhline(piraniData['diffStart'][index-1])
    ax2.axhline(piraniData['Offset'][index-1])
    
    ax1.set_xlim(-10, 40)
    ax2.set_xlim(-10, 40)
    #ax1.set_xlim(-1, 1)
    #ax2.set_xlim(-1, 1)
    lns = ln1+ln2+ln3+ln4+ln5
    #lns = ln2
    labs = [l.get_label() for l in lns]
    #ax2.legend(lns, labs) #, bbox_to_anchor=(1.05, 1), loc=2)
    # Shrink current axis by 20%
    box = ax1.get_position()
    ax1.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax2.set_position([box.x0, box.y0, box.width * 0.8, box.height])
    ax1.legend(lns, labs, bbox_to_anchor=(1.05, 1), loc=2)
    
    #plt.show() 
    plt.savefig(pp, format='pdf')
    
#plt.show()
#plt.savefig(pp, format='pdf')
pp.close()
print('Saved')