In [9]:
import pandas as pd
import glob
import dknlab_tools
import re

### First, get all the raw data files

In [10]:
maps = sorted(glob.glob('./data/*map*'))
data = sorted(glob.glob('./data/*fluor*'))

In [11]:
maps

['./data/2021_10_19_compare_single_tlKOs_plate_map.xlsx',
 './data/2021_12_03_all_nitrate_reductase_tlKOs_map.xlsx',
 './data/2021_12_27_pregrowth_comp_map.xlsx',
 './data/2021_12_28_TEAs_and_pregrowth_and_PA14muts_map.xlsx',
 './data/2022_01_07_FumarateMuts_PCAcomp_map.xlsx',
 './data/2022_05_14_NitrateReductase_tlKOs_quinone_tlKOs_map.xlsx',
 './data/2022_05_18_frdA_menA_menAubiC_tlKOs_fumarate_nitrate_4HB_map.xlsx',
 './data/2022_06_06_alternativeTEAs_tlKOs_quinone_tlKOs_map.xlsx',
 './data/2022_06_28_RedoTMAO_DMSO_NO3_map.xlsx',
 './data/2022_07_13_NitrateReductase_tlKOs_Quinone_tlKOs_QuinoneNitrateReductaseSingle_tlKOs_standing_pregrowth_map.xlsx']

### Map out which experiments had technical and which had biological replicates

In [12]:
replicate_dict_by_date = {
    '2022_07_13': 'biological',
    '2022_06_28': 'biological',
    '2022_06_06': 'biological',
    '2022_05_18': 'technical',
    '2022_05_14': 'technical',
    '2022_01_07': 'biological',
    '2021_12_28': 'technical',
    '2021_12_27': 'technical',
    '2021_12_03': 'technical',
    '2021_10_19': 'technical',
}

In [13]:
pregrowth_dict_by_date = {
    '2022_07_13': 'standing',
    '2022_06_28': 'standing',
    '2022_06_06': 'standing',
    '2022_05_18': 'shaking',
    '2022_05_14': 'shaking',
    '2022_01_07': 'standing*', # This one had all pregrowth LB as 4.8 mL LB and 200 µL POBM
    '2021_12_28': 'comparison', # This one had all pregrowth LB as 4.8 mL LB and 200 µL POBM
    '2021_12_27': 'comparison', # This one had all pregrowth LB as 4.8 mL LB and 200 µL POBM
    '2021_12_03': 'shaking',
    '2021_10_19': 'shaking',
}

### Make data frames for each date

In [14]:
dfs = []

for d, m in zip(data, maps):
    date = re.search(r'[0-9]+_[0-9]+_[0-9]+', d).group()
    df = dknlab_tools.biotek.import_growthcurves(d, m)[0]
    df['date'] = date
    df['replicate type'] = replicate_dict_by_date[date]
    df['pregrowth condition'] = pregrowth_dict_by_date[date]
    dfs.append(df)

In [15]:
dfs[0]

Unnamed: 0,Time [hr],Well,PCAred fluorescence (AU),Strain,Medium,Condition,Condition Conc. (µM),date,replicate type,pregrowth condition
0,0.072,A1,38343.0,calibration,basal medium,PCAred,250,2021_10_19,technical,shaking
1,0.156,A1,38470.0,calibration,basal medium,PCAred,250,2021_10_19,technical,shaking
2,0.239,A1,38676.0,calibration,basal medium,PCAred,250,2021_10_19,technical,shaking
3,0.322,A1,38689.0,calibration,basal medium,PCAred,250,2021_10_19,technical,shaking
4,0.406,A1,38777.0,calibration,basal medium,PCAred,250,2021_10_19,technical,shaking
...,...,...,...,...,...,...,...,...,...,...
24271,23.739,G12,6003.0,,,,,2021_10_19,technical,shaking
24272,23.822,G12,5906.0,,,,,2021_10_19,technical,shaking
24273,23.906,G12,6034.0,,,,,2021_10_19,technical,shaking
24274,23.989,G12,5968.0,,,,,2021_10_19,technical,shaking


### Combine into one dataframe and write to a file

In [16]:
all_data = pd.concat(dfs)
all_data.to_csv('./data/tidy_fluorescence_data_all_experiments.csv')