<b> Sort the protoplast luminescence data from the xlsx output from the Glariostar platereader. 
Use 2 input excels at a time (one firefly, one nanoluc)

In [148]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import glob

In [149]:
def xlsx_2_csv(xlsx):  
    """ Function to read and convert xlsx file to csv file. Also return the data (name of the folder the xlsx is in)"""
    
    # Read in the xlsx file, second sheet
    file = pd.read_excel(xlsx, 'Table End point', index_col=None) 
    
    filename = os.path.basename(xlsx)
    removed_extension = os.path.splitext(filename)[0]
    path = Path(xlsx).parent #find parent directory to the one the xlsx fields are in
    date = Path(xlsx)
    
    file.to_csv(f'{path}/{removed_extension}.csv', encoding='utf-8', index=False)
    

In [150]:
def combine_csvs(input_protein1,input_protein2,input_nluc,layout_csv,layout_protein1,layout_protein2,date,output_file_raw):
    """Function to combine two csv files containing luminescence or absorbance data, and label values using layout csv file (plate layout)"""
    #read in files
    protein1 = pd.read_csv(input_protein1, header=0)
    protein2 = pd.read_csv(input_protein2, header=0)
    nluc = pd.read_csv(input_nluc, header=0)
    #rename 3rd column
    protein1.rename(columns={protein1.columns[2]: "absorbance" }, inplace = True)
    protein2.rename(columns={protein2.columns[2]: "absorbance" }, inplace = True)
    nluc.rename(columns={nluc.columns[2]: "luminescence" }, inplace = True)
    layout_df = pd.read_csv(layout_csv, header=0)
    layout_protein1_df = pd.read_csv(layout_protein1, header=0)
    layout_protein2_df = pd.read_csv(layout_protein2, header=0)

    #make new df with correct column names, including both fluc and nluc data
    #combined = fluc[['Well\nRow', 'Well\nCol', 'Content','Average over replicates based on Blank corrected (No filter)']].copy()
    combined_protein1 = protein1[['Well','Content','absorbance']].copy()
    combined_protein1.rename(columns={'Well': "well_plate1" }, inplace = True)
    combined_protein1.rename(columns={'Content': "content" }, inplace = True)
    combined_protein2 = protein2[['Well','Content','absorbance']].copy()
    combined_protein2.rename(columns={'Well': "well_plate2" }, inplace = True)
    combined_protein2.rename(columns={'Content': "content" }, inplace = True)

    #combined.rename(columns = {'Well\nRow':'well_row', 'Well\nCol':'well_col', 'Content':'content', 'Average over replicates based on Blank corrected (No filter)':'fluc_luminescence'}, inplace = True)
    # # combined.rename(columns = {'Absorbance':'fluc_luminescence'}, inplace = True)
    # #merge with protein 2 data
    # combined = combined.merge(protein2[['Well','Content','absorbance']], on=['absorbance'], how='left')
    # combined.rename(columns = {'Well':'well_plate2'}, inplace=True)
    # combined.rename(columns = {'Content':'content_plate2'}, inplace=True)
    # #add well row and column columns    
    # combined['well_row_plate1'] = combined.well_plate1.str[:1]
    # combined['well_col_plate1'] = combined.well_plate1.str[-2:]
    # combined['well_row_plate2'] = combined.well_plate2.str[:1]
    # combined['well_col_plate2'] = combined.well_plate2.str[-2:]


    #prepend layout well col with a 0
    layout_protein1 = layout_protein1_df.copy()
    layout_protein1['well_col_plate1'] = layout_protein1_df['well_col'].astype(str).str.zfill(width=2)
    #rename well_row to well_row_plate1
    layout_protein1.rename(columns = {'well_row':'well_row_plate1'}, inplace=True)

    layout_protein2 = layout_protein2_df.copy()
    layout_protein2['well_col_plate2'] = layout_protein2_df['well_col'].astype(str).str.zfill(width=2)
    #rename well_row to well_row_plate2
    layout_protein2.rename(columns = {'well_row':'well_row_plate2'}, inplace=True)
    #
    #change df content data type to string
    combined_protein1.content = combined_protein1.content.astype(str)
    combined_protein2.content = combined_protein2.content.astype(str)
    # #add well row and column columns    
    combined_protein1['well_row_plate1'] = combined_protein1.well_plate1.str[:1]
    combined_protein1['well_col_plate1'] = combined_protein1.well_plate1.str[-2:]
    combined_protein2['well_row_plate2'] = combined_protein2.well_plate2.str[:1]
    combined_protein2['well_col_plate2'] = combined_protein2.well_plate2.str[-2:]
    #merge layout with combined
    combined_named_protein1 = pd.merge(combined_protein1, layout_protein1,on=['well_row_plate1','well_col_plate1'])
    
    combined_named_protein2 = pd.merge(combined_protein2, layout_protein2, on=['well_row_plate2','well_col_plate2'])
    #convert well_col column data type to string so it is excluded from the next bit
    combined_named_protein1.well_col = combined_named_protein1.well_col_plate1.astype(np.str)
    combined_named_protein2.well_col = combined_named_protein2.well_col_plate2.astype(np.str)
    #get Col-0-1A absorbance value
    # print(combined_named_protein2.name.unique())
    col0_protein1 = combined_named_protein1.loc[(combined_named_protein1['name'] == 'Col-0-1A') & ((combined_named_protein1['condition'] == '10mM_nitrate')|(combined_named_protein1['condition'] == 'calibrator'))]['absorbance'].values[0]
    col0_protein2 = combined_named_protein2.loc[(combined_named_protein2['name'] == 'Col-0-1A') & ((combined_named_protein2['condition'] == '10mM_nitrate')|(combined_named_protein2['condition'] == 'calibrator'))]['absorbance'].values[0]
    #normalise to Col-0-1A absorbance value
    combined_named_protein1['norm_absorbance'] = combined_named_protein1['absorbance']/col0_protein1
    combined_named_protein2['norm_absorbance'] = combined_named_protein2['absorbance']/col0_protein2


    #concat dfs, merging columns on name condition and absorbance
    combined_named_protein = pd.merge(combined_named_protein1, combined_named_protein2, on=['name','condition','absorbance','norm_absorbance'], how='outer')
    # print(combined_named_protein)
    # combined_named_protein = pd.concat([combined_named_protein1, combined_named_protein2], axis=1)
    # combined_named_protein = pd.merge(combined_named_protein1, combined_named_protein2, on=['name','condition','absorbance'])
    # print(combined_named_protein)
    #merge with layout df
    # print(combined_named_protein)
    #merge nluc data with layout
    combined_nluc = nluc[['Well','Content','luminescence']].copy()
    #rename nluc luminescence
    combined_nluc.rename(columns = {'luminescence':'nluc_luminescence', 'Well':'well','Content':'content'}, inplace=True)
    #add well row and column columns    
    combined_nluc['well_row'] = combined_nluc.well.str[:1]
    combined_nluc['well_col'] = combined_nluc.well.str[-2:]

    #prepend layout well col with a 0
    layout = layout_df.copy()
    layout['well_col'] = layout_df['well_col'].astype(str).str.zfill(width=2)
    #change df content data type to string
    combined_nluc.content = combined_nluc.content.astype(str)
    #merge layout with combined
    combined_named_nluc = pd.merge(combined_nluc, layout, on=['well_row','well_col'])
    #convert well_col column data type to string so it is excluded from the next bit
    combined_named_nluc.well_col = combined_named_nluc.well_col.astype(np.str)
    #get plate calibrator value
    plate_calibrator = combined_named_nluc.loc[(combined_named_nluc['name'] == 'plate_calibrator')]['nluc_luminescence'].values[0]
    #normalise to plate calibrator value
    combined_named_nluc['norm_nluc_luminescence'] = combined_named_nluc['nluc_luminescence']/plate_calibrator
    # print(combined_named_nluc)
    # print(combined_named_nluc)
    # print(combined_named_protein)
    #merge combined_named_nluc with combined_named_protein
    combined = pd.merge(combined_named_nluc,combined_named_protein, on=['name','condition'], how = 'left')

    #mask any values less than 400 (turn into NaNs)  
    # combined['fluc_luminescence'] = combined.fluc_luminescence.mask(combined.fluc_luminescence < 340)
    # combined['nluc_luminescence'] = combined.nluc_luminescence.mask(combined.nluc_luminescence < 340)

    #change df content data type to string
    combined.content = combined.content.astype(str)

    #add new column, nluc/absorbance
    combined['nluc/absorbance'] = combined.norm_nluc_luminescence / combined.norm_absorbance
    #remove NaNs
    combined_named_no_null = combined[pd.notnull(combined['nluc/absorbance'])]
    #add date to the data
    combined_named_no_null_date = combined_named_no_null.copy()
    combined_named_no_null_date['date'] = date
    #filter columns, only keeping name well	content	nluc_luminescence	well_row	well_col condition norm_nluc_luminescence absorbance norm_absorbance nluc/absorbance date
    combined_named_no_null_date = combined_named_no_null_date[['name','well','content','nluc_luminescence','well_row','well_col','condition','norm_nluc_luminescence','absorbance','norm_absorbance','nluc/absorbance','date']]
    # combined_named_no_null_date = combined_named_no_null_date[['well','content','nluc_luminescence','well_row','well_col','condition','norm_nluc_luminescence','absorbance','norm_absorbance','nluc/absorbance','date']]

    #make csv of raw data
    combined_named_no_null_date.to_csv(output_file_raw, encoding='utf-8', index=False)
    #make new df with mean luminescence
    # mean = combined_named_no_null[['name','condition', 'nluc/absorbance']].groupby(['name','condition']).mean().reset_index()
    ######mean = combined_named_no_null[['name', 'nluc/absorbance']].groupby('name').mean().reset_index()
    # mean.rename(columns = {'nluc/absorbance':'mean_luminescence'}, inplace = True)
    #add standard error
    # standard_error = combined_named_no_null[['name','condition', 'nluc/absorbance']].groupby(['name','condition']).sem().reset_index()
    #####standard_error = combined_named_no_null[['name','nluc/absorbance']].groupby('name').sem().reset_index()
    # standard_error.rename(columns = {'nluc/absorbance':'standard_error'}, inplace=True)
    # mean_samples = pd.merge(mean, standard_error, on=['name','condition'])
    #####mean_samples = pd.merge(mean, standard_error, on='name')
    #add date of experiment
    # mean_samples['date'] = date
    #create output file
    # mean_samples.to_csv(output_file_means, encoding='utf-8', index=False)
    

In [151]:
def main(plate_list, date, file_suffix):
    """add variables named after plates in plate list and then create output files"""
    date = date
    #find all xlsx files recursively in the 'to_be_sorted' folder
    xlsx_filenames = glob.glob(f'../../data/luminescence/root_lucN_luminescence/{date}/*.xlsx', recursive=True)
    #run the xlsx_2_csv function across all xlsx file in to_be_sorted folder
    list(map(xlsx_2_csv,xlsx_filenames))
    
    for plate_letter in plate_list:
        #input_fluc = f'../../data/luminescence/to_be_sorted/{date}/lucf_plate{plate_number}{file_suffix}.csv'
        input_protein1 = f'../../data/luminescence/root_lucN_luminescence/{date}/root_protein_pierce660nm_13.10.22_plate1_h20blank_inclraw.csv'
        input_protein2 = f'../../data/luminescence/root_lucN_luminescence/{date}/root_protein_pierce660nm_13.10.22_plate2_h20blank_inclraw.csv'
        layout_protein1 = f'../../data/luminescence/root_lucN_luminescence/{date}/layout_protein_plate1.csv'
        layout_protein2 = f'../../data/luminescence/root_lucN_luminescence/{date}/layout_protein_plate2.csv'
        
        
        #input_nluc = f'../../data/luminescence/to_be_sorted/{date}/lucn_plate{plate_number}{file_suffix}.csv'
        input_nluc = f'../../data/luminescence/root_lucN_luminescence/{date}/root_lucn{file_suffix}_plate{plate_letter}_2000gain_5ulcalibrator.csv'
        layout = f'../../data/luminescence/root_lucN_luminescence/{date}/layout_plate{plate_letter}.csv'
        # output = f'../../data/luminescence/root_lucN_luminescence/{date}/plate{plate_letter}_output_means.csv'
        output_raw = f'../../data/luminescence/root_lucN_luminescence/{date}/plate{plate_letter}_output_raw.csv'
        #combine the csvs
        combine_csvs(input_protein1,input_protein2,input_nluc,layout,layout_protein1,layout_protein2,date,output_raw)
    #make directory in plotting folder
    if not os.path.exists(f'../../data/plots/luminescence/{date}'):
        os.mkdir(f'../../data/plots/luminescence/{date}')
    #make another directory in the src folder
    if not os.path.exists(f'../../src/plotting/luminescence/{date}'):
        os.mkdir(f'../../src/plotting/luminescence/{date}')





In [152]:
#main([1,2], '27.9.21',"_270921")
main(['A','B','C'], '13.10.22',"_131022")

use os.scandir when scanning a directory, this is the fastest way according to Matt

rename columns

#### merge layout with combined

#### add new column, nluc/fluc

## plots

In [153]:
# #set style to ticks
# sns.set(style="ticks", color_codes=True)

## separate data by condition

In [154]:
# nitrate_free = combined_named_no_null[combined_named_no_null.condition == 'nitrate_free']
# #reset indexes so residuals can be calculated later
# nitrate_free.reset_index(inplace=True)

# nitrate_2hrs_morning = combined_named_no_null[combined_named_no_null.condition == 'nitrate_2hrs_morning']
# nitrate_2hrs_morning.reset_index(inplace=True)

# nitrate_overnight = combined_named_no_null[combined_named_no_null.condition == 'nitrate_overnight']
# nitrate_overnight.reset_index(inplace=True)

In [155]:
# pd.Categorical(combined_named_no_null.condition)
# names = combined_named_no_null.condition.unique()
# for name in names:
#     print(name)