In [1]:
import numpy as np
import pandas as pd
import os
import scipy

import matplotlib.pyplot as plt
%matplotlib inline

import matplotlib.gridspec as gridspec

import seaborn as sns
sns.set(style='whitegrid')

import itertools as it

%config InteractiveShell.ast_node_interactivity='all'
%config InlineBackend.figure_format = 'svg'

from os.path import join as pjoin

# Helper Functions

In [2]:
def absfluo_extractor(df, wavelen='600', abs_only=False):
    """This function returns a df which can be merged
    with temp df append the measurements"""

    
    if abs_only:
        tdf =  df.loc[df['wavelen']==wavelen][['well','measurement']].set_index(keys='well')     
        
        return tdf.rename({'measurement':wavelen}, axis=1)
    
    else:
        tdf =  df.loc[df['wavelen']==wavelen][['well','measurement', 'sdv']].set_index(keys='well')     
        awav = 'A'+wavelen
        
        return tdf.rename({'measurement':awav, 'sdv':awav+'_sdv'}, axis=1)


# Modeling Data Location

In [4]:
# data loction
path = '/home/rdmtinez/Desktop/MScThesis/data_o/calibration/community_calibration/parsed_data/'

# main regressands
regrs = 'calibration_exp_2_mixture_ODs-multiple_measurements.tsv'

# spectral data
spect = 'calibration_exp_2_mixture_ODs-spectra.tsv'

# Pre-Processing

In [6]:
rdata = pd.read_csv(pjoin(path,regrs), sep='\t', )
rdata.head()

Unnamed: 0,Well,Mean,StDev,0;2,1;2,2;2,2;1,1;1,0;1,0;0,1;0,2;0,wavelength,mode
0,A1,1.2992,0.0618,1.4091,1.297,1.3597,1.284,1.251,1.2275,1.3552,1.2525,1.2567,600,Absorbance
1,A2,1.2346,0.0746,1.3483,1.2459,1.346,1.159,1.182,1.1712,1.2821,1.1858,1.1915,600,Absorbance
2,A3,1.1739,0.0768,1.2982,1.1691,1.2929,1.115,1.1142,1.1078,1.2116,1.1108,1.1457,600,Absorbance
3,A4,1.116,0.0732,1.2291,1.1128,1.2285,1.0637,1.0524,1.0419,1.1592,1.0647,1.0919,600,Absorbance
4,A5,1.0906,0.0785,1.2256,1.0921,1.1983,1.0317,1.0157,1.0172,1.1325,1.0418,1.0605,600,Absorbance


In [7]:
keep_cols = ['Well', 'wavelength', 'mode', 'Mean', 'StDev']
rdata = rdata[keep_cols]

rename = {'Well':'well', 'Mean':'measurement', 'StDev':'sdv', 'mode':'type', 'wavelength':'wavelen'}
rdata = rdata.rename(rename, axis=1)

rdata['row'] = [i[0] for i in rdata['well']]
rdata['col'] = [i[1:] for i in rdata['well']]

rdata = rdata[['well', 'row', 'col', 'wavelen', 'measurement', 'sdv']]
rdata['wavelen'] = rdata['wavelen'].astype(str)

rdata.head()
#rdata.info()

Unnamed: 0,well,row,col,wavelen,measurement,sdv
0,A1,A,1,600,1.2992,0.0618
1,A2,A,2,600,1.2346,0.0746
2,A3,A,3,600,1.1739,0.0768
3,A4,A,4,600,1.116,0.0732
4,A5,A,5,600,1.0906,0.0785


### Absorbances DFs

In [8]:
# Main Absorbance DF @ 600
df600 = rdata[rdata['wavelen']=='600']

df600 = df600.set_index(keys='well', drop=True)

rename = {'measurement':'A600', 'sdv':'A600_sdv'}
df600 = df600.rename(rename, axis=1)

df600 = df600.drop(labels='wavelen', axis=1)


### Absorbance @ 680
df680 = absfluo_extractor(rdata, '680')

### Absorbance @ 720
df720 = absfluo_extractor(rdata, '720')

### Absorbance @ 750
df750 = absfluo_extractor(rdata, '750')

In [9]:
# MAIN regressand DataFrame
rdf = pd.concat([df600, df680, df720, df750], axis=1)
rdf.head()

Unnamed: 0_level_0,row,col,A600,A600_sdv,A680,A680_sdv,A720,A720_sdv,A750,A750_sdv
well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
A1,A,1,1.2992,0.0618,1.6864,0.0686,1.0664,0.066,1.0277,0.0693
A2,A,2,1.2346,0.0746,1.5865,0.0742,1.0232,0.0714,0.9858,0.0743
A3,A,3,1.1739,0.0768,1.4931,0.0818,0.9822,0.0758,0.9475,0.0797
A4,A,4,1.116,0.0732,1.3957,0.0791,0.9382,0.0742,0.9062,0.0787
A5,A,5,1.0906,0.0785,1.3394,0.0879,0.9184,0.0804,0.887,0.0857


# Spectral Data

In [11]:
# load spectral data
sdata = pd.read_csv(pjoin(path,spect), sep='\t')

## Pre-process

sdata['wavelength'] = sdata['wavelength'].astype(int).astype(str)

sdata = sdata.drop(labels='mode', axis=1)
sdata = sdata.rename({'Well':'well', 'wavelength':'wavelen', 'abs':'measurement'}, axis=1)

index = sdata[sdata['wavelen']=='500'].set_index('well').index
sdf = pd.DataFrame(index=index)

# create DataFrame of Regressors
waves = sdata['wavelen'].unique()
for wav in waves:
    
    sdf = pd.concat([sdf, absfluo_extractor(sdata, wavelen=wav, abs_only=True)], axis=1)

In [12]:
sdf.head()

Unnamed: 0_level_0,500,510,520,530,540,550,560,570,580,590,...,660,670,680,690,700,710,720,730,740,750
well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1,1.6336,1.4561,1.3134,1.2276,1.1818,1.1603,1.1416,1.145,1.1559,1.166,...,1.3647,1.4972,1.5497,1.314,1.0572,0.9754,0.9408,0.9182,0.907,0.8938
A2,1.546,1.4074,1.2509,1.1739,1.1342,1.1116,1.0942,1.0956,1.1052,1.1131,...,1.2893,1.4052,1.4503,1.2545,1.0092,0.9344,0.9016,0.8808,0.8697,0.8566
A3,1.4574,1.3309,1.1859,1.1145,1.0764,1.0547,1.0379,1.0384,1.0454,1.0511,...,1.2037,1.3148,1.3579,1.166,0.9529,0.8843,0.8539,0.8342,0.8238,0.8115
A4,1.3747,1.2601,1.1311,1.0658,1.0317,1.0103,0.9938,0.9928,0.9982,1.0019,...,1.1305,1.2287,1.2677,1.0978,0.9071,0.8442,0.8167,0.7978,0.7881,0.7764
A5,1.3221,1.202,1.1023,1.0411,1.0059,0.9874,0.9702,0.9677,0.9713,0.9735,...,1.0828,1.1719,1.2072,1.0539,0.8788,0.8212,0.7946,0.7767,0.7667,0.7552


# Append Labels

In [13]:
# create a DataFrame on which to merge label values
ldf = rdf[['row', 'col']].copy()


# these values are not arbitrary and were obtained from a coleague
B_labels = {'A':0.40, 'B':0.35, 
            'C':0.30, 'D':0.25,
            'E':0.20, 'F':0.15,
            'G':0.10, 'H':0.00}

C_labels = {'1':0.50, '2':0.46, '3':0.42,
            '4':0.38, '5':0.34, '6':0.30,
            '7':0.25, '8':0.21, '9':0.17,
            '10':0.13,'11':0.09,'12':0.00}

ldf['B_lbl'] = ldf['row'].map(B_labels)

ldf['C_lbl'] = ldf['col'].map(C_labels)

In [14]:
ldf = ldf.drop(labels=['row', 'col'], axis=1)
ldf.head()

Unnamed: 0_level_0,B_lbl,C_lbl
well,Unnamed: 1_level_1,Unnamed: 2_level_1
A1,0.4,0.5
A2,0.4,0.46
A3,0.4,0.42
A4,0.4,0.38
A5,0.4,0.34


# Merge Onto Main DataFrame

In [16]:
mdf = pd.concat([rdf.iloc[:,0:2], ldf, rdf.iloc[:,2:], sdf], axis=1)
mdf.head()

Unnamed: 0_level_0,row,col,B_lbl,C_lbl,A600,A600_sdv,A680,A680_sdv,A720,A720_sdv,...,660,670,680,690,700,710,720,730,740,750
well,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
A1,A,1,0.4,0.5,1.2992,0.0618,1.6864,0.0686,1.0664,0.066,...,1.3647,1.4972,1.5497,1.314,1.0572,0.9754,0.9408,0.9182,0.907,0.8938
A2,A,2,0.4,0.46,1.2346,0.0746,1.5865,0.0742,1.0232,0.0714,...,1.2893,1.4052,1.4503,1.2545,1.0092,0.9344,0.9016,0.8808,0.8697,0.8566
A3,A,3,0.4,0.42,1.1739,0.0768,1.4931,0.0818,0.9822,0.0758,...,1.2037,1.3148,1.3579,1.166,0.9529,0.8843,0.8539,0.8342,0.8238,0.8115
A4,A,4,0.4,0.38,1.116,0.0732,1.3957,0.0791,0.9382,0.0742,...,1.1305,1.2287,1.2677,1.0978,0.9071,0.8442,0.8167,0.7978,0.7881,0.7764
A5,A,5,0.4,0.34,1.0906,0.0785,1.3394,0.0879,0.9184,0.0804,...,1.0828,1.1719,1.2072,1.0539,0.8788,0.8212,0.7946,0.7767,0.7667,0.7552


# Export

In [13]:
#f_name = 'community_calibration_dataframe.csv'
#mdf.to_csv(d_loc+f_name, sep=',')