<b> Sort the protoplast luminescence data from the xlsx output from the Glariostar platereader. 
Use 2 input excels at a time (one firefly, one nanoluc)

In [1]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import glob

In [2]:
def xlsx_2_csv(xlsx):  
    """ Function to read and convert xlsx file to csv file. Also return the data (name of the folder the xlsx is in)"""
    
    # Read in the xlsx file, second sheet
    file = pd.read_excel(xlsx, 'Table End point', index_col=None) 
    
    filename = os.path.basename(xlsx)
    removed_extension = os.path.splitext(filename)[0]
    path = Path(xlsx).parent #find parent directory to the one the xlsx fields are in
    date = Path(xlsx)
    
    file.to_csv(f'{path}/{removed_extension}.csv', encoding='utf-8', index=False)
    

In [3]:
def combine_csvs(input_fluc,input_nluc,layout_csv,date,output_file_means,output_file_raw):
    """Function to combine two csv files containingg luminescence data, and label values using layout csv file (plate layout)"""
    #read in files
    fluc = pd.read_csv(input_fluc, header=0)
    nluc = pd.read_csv(input_nluc, header=0)
    #rename 3rd column
    fluc.rename(columns={fluc.columns[2]: "Luminescence" }, inplace = True)
    nluc.rename(columns={nluc.columns[2]: "Luminescence" }, inplace = True)
    layout_df = pd.read_csv(layout_csv, header=0)
    #make new df with correct column names, including both fluc and nluc data
    #combined = fluc[['Well\nRow', 'Well\nCol', 'Content','Average over replicates based on Blank corrected (No filter)']].copy()
    combined = fluc[['Well','Content','Luminescence']].copy()  
    #combined.rename(columns = {'Well\nRow':'well_row', 'Well\nCol':'well_col', 'Content':'content', 'Average over replicates based on Blank corrected (No filter)':'fluc_luminescence'}, inplace = True)
    combined.rename(columns = {'Luminescence':'fluc_luminescence'}, inplace = True)
    #merge with nluc
    combined = pd.merge(combined, nluc, on=['Well','Content'], how='left')
    #combined['nluc_luminescence'] = nluc['Luminescence'].copy()
    #rename nluc luminescence
    combined.rename(columns = {'Luminescence':'nluc_luminescence', 'Well':'well','Content':'content'}, inplace=True)
    #add well row and column columns    
    combined['well_row'] = combined.well.str[:1]
    combined['well_col'] = combined.well.str[-2:]
    #mask any values less than 400 (turn into NaNs)  
    combined['fluc_luminescence'] = combined.fluc_luminescence.mask(combined.fluc_luminescence < 340)
    combined['nluc_luminescence'] = combined.nluc_luminescence.mask(combined.nluc_luminescence < 340)
    #prepend layout well col with a 0
    layout = layout_df.copy()
    layout['well_col'] = layout_df['well_col'].astype(str).str.zfill(width=2)
    #change df content data type to string
    combined.content = combined.content.astype(str)
    #merge layout with combined
    combined_named = pd.merge(combined, layout, on=['well_row','well_col'])
    #convert well_col column data type to string so it is excluded from the next bit
    combined_named.well_col = combined_named.well_col.astype(np.str)
    #add new column, nluc/fluc
    combined_named['nluc/fluc'] = combined_named.nluc_luminescence / combined_named.fluc_luminescence
    #remove NaNs
    combined_named_no_null = combined_named[pd.notnull(combined_named['nluc/fluc'])]
    #add date to the data
    combined_named_no_null_date = combined_named_no_null.copy()
    combined_named_no_null_date['date'] = date
    #make csv of raw data
    combined_named_no_null_date.to_csv(output_file_raw, encoding='utf-8', index=False)
    #make new df with mean luminescence
    mean = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).mean().reset_index()
    ######mean = combined_named_no_null[['name', 'nluc/fluc']].groupby('name').mean().reset_index()
    mean.rename(columns = {'nluc/fluc':'mean_luminescence'}, inplace = True)
    #add standard error
    standard_error = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).sem().reset_index()
    #####standard_error = combined_named_no_null[['name','nluc/fluc']].groupby('name').sem().reset_index()
    standard_error.rename(columns = {'nluc/fluc':'standard_error'}, inplace=True)
    mean_samples = pd.merge(mean, standard_error, on=['name','condition'])
    #####mean_samples = pd.merge(mean, standard_error, on='name')
    #add date of experiment
    mean_samples['date'] = date
    #create output file
    mean_samples.to_csv(output_file_means, encoding='utf-8', index=False)
    

In [4]:
def main(plate_list, date, file_suffix):
    """add variables named after plates in plate list and then create output files"""
    date = date
    #find all xlsx files recursively in the 'to_be_sorted' folder
    xlsx_filenames = glob.glob(f'../../data/luminescence/to_be_sorted/{date}/*.xlsx', recursive=True)
    #run the xlsx_2_csv function across all xlsx file in to_be_sorted folder
    list(map(xlsx_2_csv,xlsx_filenames))
    for plate_number in plate_list:
        #input_fluc = f'../../data/luminescence/to_be_sorted/{date}/lucf_plate{plate_number}{file_suffix}.csv'
        input_fluc = f'../../data/luminescence/to_be_sorted/{date}/lucf{file_suffix}_plate{plate_number}.csv'
        #input_nluc = f'../../data/luminescence/to_be_sorted/{date}/lucn_plate{plate_number}{file_suffix}.csv'
        input_nluc = f'../../data/luminescence/to_be_sorted/{date}/lucn{file_suffix}_plate{plate_number}.csv'
        layout = f'../../data/luminescence/to_be_sorted/{date}/layout_plate{plate_number}.csv'
        output = f'../../data/luminescence/to_be_sorted/{date}/plate{plate_number}_output_means.csv'
        output_raw = f'../../data/luminescence/to_be_sorted/{date}/plate{plate_number}_output_raw.csv'
        #combine the csvs
        combine_csvs(input_fluc,input_nluc,layout,date,output,output_raw)
    #make directory in plotting folder
    if not os.path.exists(f'../../data/plots/luminescence/{date}'):
        os.mkdir(f'../../data/plots/luminescence/{date}')
    #make another directory in the src folder
    if not os.path.exists(f'../../src/plotting/luminescence/{date}'):
        os.mkdir(f'../../src/plotting/luminescence/{date}')





In [5]:
#main([1,2], '27.9.21',"_270921")
main([1,2,3,4], '27.04.22',"_270422")

use os.scandir when scanning a directory, this is the fastest way according to Matt

rename columns

#### merge layout with combined

#### add new column, nluc/fluc

## plots

In [77]:
#set style to ticks
sns.set(style="ticks", color_codes=True)

NameError: name 'sns' is not defined

## separate data by condition

In [41]:
# nitrate_free = combined_named_no_null[combined_named_no_null.condition == 'nitrate_free']
# #reset indexes so residuals can be calculated later
# nitrate_free.reset_index(inplace=True)

# nitrate_2hrs_morning = combined_named_no_null[combined_named_no_null.condition == 'nitrate_2hrs_morning']
# nitrate_2hrs_morning.reset_index(inplace=True)

# nitrate_overnight = combined_named_no_null[combined_named_no_null.condition == 'nitrate_overnight']
# nitrate_overnight.reset_index(inplace=True)

In [79]:
pd.Categorical(combined_named_no_null.condition)
names = combined_named_no_null.condition.unique()
for name in names:
    print(name)

nitrate_free
nitrate_2hrs_morning
nitrate_overnight
