<b> Sort the protoplast luminescence data from the xlsx output from the Glariostar platereader. 
Use 2 input excels at a time (one firefly, one nanoluc)

In [14]:
import pandas as pd
import numpy as np
import os
from pathlib import Path
import glob

In [15]:
def xlsx_2_csv(xlsx):  
    """ Function to read and convert xlsx file to csv file. Also return the data (name of the folder the xlsx is in)"""
    
    # Read in the xlsx file, second sheet
    file = pd.read_excel(xlsx, 'Table End point', index_col=None) 
    
    filename = os.path.basename(xlsx)
    removed_extension = os.path.splitext(filename)[0]
    path = Path(xlsx).parent #find parent directory to the one the xlsx fields are in
    date = Path(xlsx)
    
    file.to_csv(f'{path}/{removed_extension}.csv', encoding='utf-8', index=False)
    

In [39]:
def combine_csvs(input_fluc,input_nluc,layout_csv,date,output_file_means,output_file_raw):
    """Function to combine two csv files containingg luminescence data, and label values using layout csv file (plate layout)"""
    #read in files
    fluc = pd.read_csv(input_fluc, header=0)
    nluc = pd.read_csv(input_nluc, header=0)
    print(nluc.columns)
    layout_df = pd.read_csv(layout_csv, header=0)
    #make new df with correct column names, including both fluc and nluc data
    #combined = fluc[['Well\nRow', 'Well\nCol', 'Content','Average over replicates based on Blank corrected (No filter)']].copy()
    combined = fluc[['Well','Content','Luminescence']].copy()  

    #combined.rename(columns = {'Well\nRow':'well_row', 'Well\nCol':'well_col', 'Content':'content', 'Average over replicates based on Blank corrected (No filter)':'fluc_luminescence'}, inplace = True)
    combined.rename(columns = {'Well':'well','Content':'content', 'Luminescence':'fluc_luminescence'}, inplace = True)
    combined['nluc_luminescence'] = nluc['Luminescence'].copy()
    #add well row and column columns
    combined['well_row'] = combined.well.str[:1]
    combined['well_col'] = combined.well.str[-2:]
    #mask any values less than 400 (turn into NaNs)  
    combined['fluc_luminescence'] = combined.fluc_luminescence.mask(combined.fluc_luminescence < 400)
    combined['nluc_luminescence'] = combined.nluc_luminescence.mask(combined.nluc_luminescence < 400)
    #prepend layout well col with a 0
    layout = layout_df.copy()
    layout['well_col'] = layout_df['well_col'].astype(str).str.zfill(width=2)
    #change df content data type to string
    combined.content = combined.content.astype(str)
    #merge layout with combined
    combined_named = pd.merge(combined, layout, on=['well_row','well_col'])
    #convert well_col column data type to string so it is excluded from the next bit
    combined_named.well_col = combined_named.well_col.astype(np.str)
    #add new column, nluc/fluc
    combined_named['nluc/fluc'] = combined_named.nluc_luminescence / combined_named.fluc_luminescence
    #remove NaNs
    combined_named_no_null = combined_named[pd.notnull(combined_named['nluc/fluc'])]
    #add date to the data
    combined_named_no_null_date = combined_named_no_null.copy()
    combined_named_no_null_date['date'] = date
    #make csv of raw data
    combined_named_no_null_date.to_csv(output_file_raw, encoding='utf-8', index=False)
    #make new df with mean luminescence
    mean = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).mean().reset_index()
    ######mean = combined_named_no_null[['name', 'nluc/fluc']].groupby('name').mean().reset_index()
    mean.rename(columns = {'nluc/fluc':'mean_luminescence'}, inplace = True)
    #add standard error
    standard_error = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).sem().reset_index()
    #####standard_error = combined_named_no_null[['name','nluc/fluc']].groupby('name').sem().reset_index()
    standard_error.rename(columns = {'nluc/fluc':'standard_error'}, inplace=True)
    mean_samples = pd.merge(mean, standard_error, on=['name','condition'])
    #####mean_samples = pd.merge(mean, standard_error, on='name')
    #add date of experiment
    mean_samples['date'] = date
    #create output file
    mean_samples.to_csv(output_file_means, encoding='utf-8', index=False)
    

In [45]:
date = '28.6.21'
input_fluc = f'../../data/luminescence/to_be_sorted/{date}/plate3_fluc_edited.csv'
input_nluc = f'../../data/luminescence/to_be_sorted/{date}/plate3_nluc_edited.csv'
layout = f'../../data/luminescence/to_be_sorted/{date}/layout.csv'
output = f'../../data/luminescence/to_be_sorted/{date}/plate3_output_means.csv'
output_raw = f'../../data/luminescence/to_be_sorted/{date}/plate3_output_raw.csv'


In [20]:
#find all xlsx files recursively in the 'to_be_sorted' folder
xlsx_filenames = glob.glob(f'../../data/luminescence/to_be_sorted/{date}/*.xlsx', recursive=True)

In [21]:
#run the xlsx_2_csv function across all xlsx file in to_be_sorted folder
list(map(xlsx_2_csv,xlsx_filenames))               
                 

[None, None, None, None, None, None]

use os.scandir when scanning a directory, this is the fastest way according to Matt

In [85]:
# input_fluc = '../../data/luminescence/to_be_sorted/24.11.19/nitrate_free_phytogel_fluc.csv'
# input_nluc = '../../data/luminescence/to_be_sorted/24.11.19/nitrate_free_phytogel_nluc.csv'
# layout = '../../data/luminescence/to_be_sorted/24.11.19/layout.csv'
# output = '../../data/luminescence/to_be_sorted/24.11.19/output_means.csv'
# output_raw = '../../data/luminescence/to_be_sorted/24.11.19/output_raw.csv'
# date = '24.11.19'

In [46]:
combine_csvs(input_fluc,input_nluc,layout,date,output, output_raw)

Index(['Well', 'Content', 'Luminescence'], dtype='object')


In [28]:
fluc = pd.read_csv(input_fluc, header = 0)
nluc = pd.read_csv(input_nluc, header = 0)
layout = pd.read_csv(layout, header = 0)

In [26]:
fluc.columns

Index(['Well\nRow', 'Well\nCol', 'Content', 'Raw Data (No filter)',
       'Blank corrected based on Raw Data (No filter)',
       'Average over replicates based on Blank corrected (No filter)'],
      dtype='object')

In [27]:
nluc

Unnamed: 0,Well\nRow,Well\nCol,Content,Raw Data (No filter),Blank corrected based on Raw Data (No filter),Average over replicates based on Blank corrected (No filter)
0,A,1,Sample X1,46764004,46763438.0,46763438.0
1,A,2,Sample X6,5237598,5237032.0,5237032.0
2,A,3,Sample X11,2987517,2986951.0,2986951.0
3,A,4,Sample X16,8128457,8127891.0,8127891.0
4,A,5,Sample X21,10918847,10918281.0,10918281.0
5,A,6,Sample X26,4832534,4831968.0,4831968.0
6,A,7,Sample X31,2671087,2670521.0,2670521.0
7,A,8,Sample X36,3483987,3483421.0,3483421.0
8,A,9,Sample X41,3479548,3478982.0,3478982.0
9,B,1,Sample X2,51752,51186.0,51186.0


In [54]:
combined = fluc[['Well\nRow', 'Well\nCol', 'Content','Average over replicates based on Blank corrected (No filter)']].copy()

rename columns

In [55]:
combined.rename(columns = {'Well\nRow':'well_row', 'Well\nCol':'well_col', 'Content':'content', 'Average over replicates based on Blank corrected (No filter)':'fluc_luminescence'}, inplace = True)

In [56]:
combined['nluc_luminescence'] = nluc[['Average over replicates based on Blank corrected (No filter)']].copy()

In [57]:
#make any values less than 400 NaN
#combined.nluc_luminescence = combined.mask(combined.nluc_luminescence < 400)
combined['fluc_luminescence'] = combined.fluc_luminescence.mask(combined.fluc_luminescence < 400)
combined['nluc_luminescence'] = combined.nluc_luminescence.mask(combined.nluc_luminescence < 400)

In [58]:
combined

Unnamed: 0,well_row,well_col,content,fluc_luminescence,nluc_luminescence
0,A,1,Sample X1,394917.0,46763438.0
1,A,2,Sample X6,125696.0,5237032.0
2,A,3,Sample X11,64547.0,2986951.0
3,A,4,Sample X16,95437.0,8127891.0
4,A,5,Sample X21,118307.0,10918281.0
5,A,6,Sample X26,71519.0,4831968.0
6,A,7,Sample X31,84739.0,2670521.0
7,A,8,Sample X36,114046.0,3483421.0
8,A,9,Sample X41,74478.0,3478982.0
9,B,1,Sample X2,188045.0,51186.0


In [59]:
#rename well_row in layout
layout

Unnamed: 0,well_row,well_col,name,condition
0,A,1,71 + 72,nitrate_free
1,B,1,25+72,nitrate_free
2,C,1,35+72,nitrate_free
3,D,1,36+72,nitrate_free
4,E,1,92+72,nitrate_free
...,...,...,...,...
91,D,12,0,
92,E,12,0,
93,F,12,0,
94,G,12,0,


#### merge layout with combined

In [60]:
combined_named = pd.merge(combined, layout, on=['well_row','well_col'])

In [61]:
#convert well_col column data type to string so it is excluded from the next bit
combined_named.well_col = combined_named.well_col.astype(np.str)

#### add new column, nluc/fluc

In [62]:
combined_named['nluc/fluc'] = combined_named.nluc_luminescence / combined_named.fluc_luminescence

In [63]:
#remove NaNs
combined_named_no_null = combined_named[pd.notnull(combined_named['nluc/fluc'])]
combined_named_no_null

Unnamed: 0,well_row,well_col,content,fluc_luminescence,nluc_luminescence,name,condition,nluc/fluc
0,A,1,Sample X1,394917.0,46763438.0,71 + 72,nitrate_free,118.413332
1,A,2,Sample X6,125696.0,5237032.0,71 + 72,nitrate_free,41.664269
2,A,3,Sample X11,64547.0,2986951.0,71 + 72,nitrate_free,46.275598
3,A,4,Sample X16,95437.0,8127891.0,71 + 72,nitrate_2hrs_morning,85.164988
4,A,5,Sample X21,118307.0,10918281.0,71 + 72,nitrate_2hrs_morning,92.287701
5,A,6,Sample X26,71519.0,4831968.0,71 + 72,nitrate_2hrs_morning,67.562018
6,A,7,Sample X31,84739.0,2670521.0,71 + 72,nitrate_overnight,31.514663
7,A,8,Sample X36,114046.0,3483421.0,71 + 72,nitrate_overnight,30.544
8,A,9,Sample X41,74478.0,3478982.0,71 + 72,nitrate_overnight,46.711539
9,B,1,Sample X2,188045.0,51186.0,25+72,nitrate_free,0.272201


In [68]:
mean = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).mean().reset_index()
mean.rename(columns = {'nluc/fluc':'mean_luminescence'}, inplace = True)
mean

Unnamed: 0,name,condition,mean_luminescence
0,25+72,nitrate_2hrs_morning,2.790261
1,25+72,nitrate_free,0.550167
2,25+72,nitrate_overnight,1.590474
3,35+72,nitrate_2hrs_morning,52.739306
4,35+72,nitrate_free,39.059989
5,35+72,nitrate_overnight,34.324808
6,36+72,nitrate_2hrs_morning,7.455392
7,36+72,nitrate_free,2.743449
8,36+72,nitrate_overnight,14.478988
9,71 + 72,nitrate_2hrs_morning,81.671569


In [69]:
standard_error = combined_named_no_null[['name','condition', 'nluc/fluc']].groupby(['name','condition']).sem().reset_index()
standard_error.rename(columns = {'nluc/fluc':'standard_error'}, inplace=True)

In [72]:
mean_samples = pd.merge(mean, standard_error, on=['name','condition'])

In [73]:
mean_samples

Unnamed: 0,name,condition,mean_luminescence,standard_error
0,25+72,nitrate_2hrs_morning,2.790261,2.193078
1,25+72,nitrate_free,0.550167,0.222781
2,25+72,nitrate_overnight,1.590474,0.172189
3,35+72,nitrate_2hrs_morning,52.739306,11.566316
4,35+72,nitrate_free,39.059989,4.871612
5,35+72,nitrate_overnight,34.324808,5.507199
6,36+72,nitrate_2hrs_morning,7.455392,6.53336
7,36+72,nitrate_free,2.743449,0.756559
8,36+72,nitrate_overnight,14.478988,8.061074
9,71 + 72,nitrate_2hrs_morning,81.671569,7.348306


In [74]:
date_of_experiment = '30/8/19'
mean_samples['date'] = date_of_experiment

In [75]:
mean_samples

Unnamed: 0,name,condition,mean_luminescence,standard_error,date
0,25+72,nitrate_2hrs_morning,2.790261,2.193078,30/8/19
1,25+72,nitrate_free,0.550167,0.222781,30/8/19
2,25+72,nitrate_overnight,1.590474,0.172189,30/8/19
3,35+72,nitrate_2hrs_morning,52.739306,11.566316,30/8/19
4,35+72,nitrate_free,39.059989,4.871612,30/8/19
5,35+72,nitrate_overnight,34.324808,5.507199,30/8/19
6,36+72,nitrate_2hrs_morning,7.455392,6.53336,30/8/19
7,36+72,nitrate_free,2.743449,0.756559,30/8/19
8,36+72,nitrate_overnight,14.478988,8.061074,30/8/19
9,71 + 72,nitrate_2hrs_morning,81.671569,7.348306,30/8/19


## plots

In [77]:
#set style to ticks
sns.set(style="ticks", color_codes=True)

NameError: name 'sns' is not defined

## separate data by condition

In [41]:
# nitrate_free = combined_named_no_null[combined_named_no_null.condition == 'nitrate_free']
# #reset indexes so residuals can be calculated later
# nitrate_free.reset_index(inplace=True)

# nitrate_2hrs_morning = combined_named_no_null[combined_named_no_null.condition == 'nitrate_2hrs_morning']
# nitrate_2hrs_morning.reset_index(inplace=True)

# nitrate_overnight = combined_named_no_null[combined_named_no_null.condition == 'nitrate_overnight']
# nitrate_overnight.reset_index(inplace=True)

In [79]:
pd.Categorical(combined_named_no_null.condition)
names = combined_named_no_null.condition.unique()
for name in names:
    print(name)

nitrate_free
nitrate_2hrs_morning
nitrate_overnight
