In [140]:
import numpy as np
import pandas as pd
import os
import scipy

from sklearn import preprocessing

import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec

import seaborn as sns
sns.set(style='whitegrid')

from os.path import join as pjoin

import glob

import itertools as it

%config InteractiveShell.ast_node_interactivity='all'


In [141]:
# Helper Functions

In [142]:
def extract_pbr_at_time(df, extract_time, time_delta='30min', return_single=False):
    """
    This function returns a time specific portion of the PBR DataFrame consisting
    of the previous 16min (as default) of the input extract_time and the post 5min.
    If return_single=True, a resampled mean of that portion is returned as single 
    row DF
    """
    
    # ensure that the arguments are in the proper format for manipulation
    st = pd.to_datetime(extract_time)    
    td = pd.to_timedelta(time_delta)
    m6 = pd.to_timedelta('5min')
    
    # create mask from relevant dates
    mask = (df.index > st-td) & (df.index <= st+m6)
    
    # extracted dates DF
    ext =  df.loc[mask]
    
    if not return_single:
        
        return ext.dropna(how='any')
    
    else:
        # down sample the extracted dates to 5min buckets and return the row
        # that is closest in time with the requested 'extract_time' parameter
        ext = ext.resample('15min', label='right').mean().dropna(how='any')
        
        # index of 'nearest' to requested time
        idx = ext.index.get_loc(st, method='nearest')#get index date
        
        # Series needs to be transformed for posterity
        single_row = pd.DataFrame(ext.iloc[idx]).T
        
        return single_row


# Data

In [162]:
pbr_path='/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed'
locs = []
for root, dirs, files in os.walk(pbr_path):
    for name in files:
        if 'daily' not in name:
            locs.append(pjoin(root, name))

locs =  [locs[2], locs[1], locs[0], locs[3]]

['/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_3_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_2_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_1_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_4_preprocesed.csv']

['/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_1_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_2_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_3_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_4_preprocesed.csv']

In [42]:
exp2_dates = ['2019-04-17 21:00:00', '2019-04-18 17:30:00',
               '2019-04-19 12:30:00', '2019-04-20 16:45:00',
               '2019-04-21 20:00:00', '2019-04-22 14:00:00',
               '2019-04-23 15:30:00', '2019-04-24 17:30:00',
               '2019-04-25 16:30:00', '2019-04-26 17:45:00',
               '2019-04-27 18:15:00', '2019-04-28 18:45:00',
               '2019-04-29 17:45:00']

exp3_dates = ['2019-05-11 19:00:00', '2019-05-12 19:30:00',
               '2019-05-13 19:45:00', '2019-05-14 20:30:00',
               '2019-05-15 17:15:00', '2019-05-16 16:45:00',
               '2019-05-17 14:30:00', '2019-05-20 15:15:00',
               '2019-05-21 14:45:00', '2019-05-22 15:15:00',
               '2019-05-23 15:00:00', '2019-05-24 16:45:00']


# comparison 1
# Extract the above dates from the the PBR respective exp # datasets using the date extraction tool
# Apply Old & New model to PBR data and see how these compare against each other


#comparison
# Create a tool which extracts the datetime row from the raw CC data files
# these dates will then be used to extract absorbance readings from their
# respective pbr data, depending on how comparison1 fares we could then
# say something about what we should have expected on exp4

In [128]:
# use too glob library to extract from each day's T1, the time at which it was
# measured, this will give you a list of datetime which you can then extract from
# the pbr preprocessed data.... 

# having done the comparison above: i.e. compara how the tecan measurements compare
# against the pbr data
path0 = '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/exp?/cell_counter/*d?_t1_01.#m4' # this works
path1 = '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/exp?/cell_counter/*_d??_t1_01.#m4'

In [129]:
# global dataframe

date_times = {'xp1':{},
              'xp2':{},
              'xp3':{},
              'xp4':{}}

In [133]:
for path in [path0, path1]:
    for path_str in glob.iglob(path):
        path_str

        splt = path_str.split(sep='_') 
        xpN = splt[-4]
        xpD = splt[-3]

        # load files
        f = open(path_str)
        lines = f.readlines()[31]

        # the files are standardized thus in the 32nd line
        # we selected the True values which encompass the
        # time at which these measurements were made, this
        # only works with numpy arrays, however
        try:

            selector = [False, False, False, True, False, True, True, True]
            #lines.strip('\n').split(' ')
            asarray = np.array(lines.strip('\n').split(' '))[selector]

        except:
            selector = [False, False, False, True, False, False, True, True, True]
            #lines.strip('\n').split(' ')
            asarray = np.array(lines.strip('\n').split(' '))[selector]

        # join the datetime informatio and convert to pd.datetime obj
        datetime = ' '.join([*asarray][::-1])
        datetime = pd.to_datetime(datetime)

        # append to dictionary
        date_times[xpN][xpD] = datetime

In [133]:
# The dates also exist in the all cellcounts datasheet, compare

In [None]:
# Once you have done this ask pepe for other tecan-measured data if he has it for exp4

In [148]:
# load TecMeasd Modeled Data
# for these exact experiemts x2 x3
# pull the PBR abs measurements for these experiments dates
# apply the model and compare them against each other,
# how closely do they align... is it regressable, if regressable, there is a high degree
# of correlation OR they match exactly  along the 1:1 LINE, this would mean the models
# are 'perfect'
# 
path = '/home/rdmtinez/Documents/B-IT MS Program/Masters Thesis/data_o/pbr/pbr_modeled_output_data'

tcx2= pd.read_csv(pjoin(path,'tec_measd_exp2_modeled_output.csv'))
tcx3= pd.read_csv(pjoin(path,'tec_measd_exp3_modeled_output.csv'))


tcx2.head()



# load PBR data


pbr1 = pd.read_csv(locs[0])
pbr2 = pd.read_csv(locs[1])
pbr3 = pd.read_csv(locs[2])
pbr4 = pd.read_csv(locs[3])





# extract dates from pbr_cell counts data because its more complete than what you extracted
# above for exp4, (above extracted dates should match).... you need to extract these dates
# so that you can:
    # 1. check the measurements given by PEPE with the averages extracted by your function
            # they should be very close
    # 2. apply the a680_a720 model to those dates rows
    # 3. IF the first analysis of tecmesd data values
    # match along the 1:1 line then its likely that these 
    # measurements in the tubes are fairly truthful (depending on the ratio analysis)
    # if there is a HIGH Degree of Correlation (which is what I'm thinking) then
    # do a regression and obtain kT, this kT is another coversion factor to the PBR
    # data, if this is the case can we ignore the other constants
    
path = '/home/rdmtinez/Documents/B-IT MS Program/Masters Thesis/data_o/pbr'
fname = 'all_pbr_cell_counter_results.csv'


# apply model extracted dates data to pbr
# compare

# apply the new models to the old calibration data

Unnamed: 0,date_time,T1680,T1730,T1750,T2680,T2730,T2750,T3680,T3730,T3750,...,T7B_p680_730_560,T7C_p680_730_560,T7B_p680_750_560,T7C_p680_750_560,T8B_p680_720_560,T8C_p680_720_560,T8B_p680_730_560,T8C_p680_730_560,T8B_p680_750_560,T8C_p680_750_560
0,2019-04-17 20:55:00,0.1221,0.104,0.1052,0.124,0.1043,0.1055,0.1158,0.1012,0.1029,...,0.14582,0.001201,0.144321,0.001964,0.113544,0.018788,0.134066,0.008342,0.133698,0.00853
1,2019-04-18 17:25:00,0.1917,0.172,0.172,0.2022,0.1835,0.1808,0.2101,0.1837,0.1799,...,0.143023,0.00122,0.144264,0.000589,0.11645,0.020665,0.137578,0.009911,0.137586,0.009907
2,2019-04-19 12:25:00,0.1526,0.131,0.1313,0.1594,0.1371,0.1374,0.2185,0.1853,0.1831,...,0.135463,0.02429,0.137453,0.023277,0.102202,0.06233,0.123283,0.051601,0.123969,0.051252
3,2019-04-20 16:40:00,0.132,0.1215,0.1225,0.1328,0.1199,0.1219,0.1625,0.1493,0.1486,...,0.142132,0.003871,0.140445,0.00473,0.101756,0.034428,0.121158,0.024553,0.122979,0.023626
4,2019-04-21 19:55:00,0.1302,,0.1211,0.1345,,0.1208,0.1476,,0.1383,...,,,0.143788,-0.001426,,,,,0.145497,-0.00071


['/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_3_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_2_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_1_preprocesed.csv',
 '/home/rdmtinez/Desktop/MScThesis/data_o/pbr/pbr_exp_preprocessed/pbr_exp_4_preprocesed.csv']