In [185]:
import numpy as np
import pandas as pd
import csv 
import sys

In [186]:
# Inputs
input_file_folder = "/Users/keltoumboukra/Desktop/Coding projects - Git/SamplePoolingForNGS/Files/"
input_file_name = "InputTrimTest.csv"

# final desired vol (nL)
final_pool_vol = 10000

# well position of pool in output plate
pool_well_in_output_plate = "A1"

# minimum volume to be pipetted (nL)
min_pipetting_capacity = float(25) # for Echo

# minimum volume that can be pipetted 
vol_available_in_well = 5000 # for e.g, if there is 20 uL in the well, 5000 nL is what can be available as 15 uL is the minimum working range in the PP-0200 Echo plate)

# Dataframes
input_file_df = pd.read_csv(input_file_folder + input_file_name)
input_file_df.rename(columns={input_file_df.columns[2]: "Concentration"},inplace=True)




In [187]:
# Outputs
output_files_folder = "/Users/keltoumboukra/Desktop/Coding projects - Git/SamplePoolingForNGS/Files/"
output_file_name = "Output_trim.CSV"
output_file_df = pd.DataFrame(columns=['Sample Well ID','Sample Concentration','Sample Calculated Weight', 'Sample Calculated Weight Normalised', 'Sample Calculated Volume In Pool'])

output_echo_file_name = "Output_trim_QubitOutputEcho.csv"
output_echo_file_df = pd.DataFrame(columns=['Source Well','Destination Well','Transfer Volume'])

output_files_folder = "/Users/keltoumboukra/Desktop/Coding projects - Git/SamplePoolingForNGS/Files/"
output_report_name = "Output_trim_report.CSV"
output_report_df = pd.DataFrame(columns=['Sample Well ID','Sample Concentration','Comment'])


In [188]:
# Trim input data frame 

# Remove standards 
input_file_df = input_file_df[input_file_df["Content"].str.contains("Standard") == False]
input_file_df = input_file_df.reset_index(drop=True)

# Remove samples with negative concentrations and add to output report
for row in input_file_df.itertuples():
    
    Index = row [0]
    Well = row[1]
    Concentration = row[3]
    
    if Concentration <= 0:
        output_report_df = output_report_df.append({'Sample Well ID': Well, 'Sample Concentration': Concentration, 'Comment': "Sample has negative value for concentration"}, ignore_index=True)
        input_file_df = input_file_df.drop(Index)

input_file_df = input_file_df.reset_index(drop=True)
output_report_df = output_report_df.reset_index(drop=True)


In [189]:
# Calculate Weights 

max_concentration = input_file_df['Concentration'].max()
sum_weights = float()

for row in input_file_df.itertuples():
    
    Index = row [0]
    Well = row[1]
    Concentration = row[3]

    sample_weight = max_concentration/Concentration
    sum_weights += sample_weight
    output_file_df = output_file_df.append({'Sample Well ID': Well, 'Sample Concentration': Concentration, 'Sample Calculated Weight': sample_weight}, ignore_index=True)  
    

In [190]:
# Calculate Normalised Weights 

sum_normalised_weights = float(0)
for row in input_file_df.itertuples():
    
    Index = row [0]
    Well = row[1]
    Concentration = row[3]
    
    sample_normalised_weight = output_file_df["Sample Calculated Weight"].iloc[Index]/sum_weights
    sum_normalised_weights += sample_normalised_weight # for QC, should be =1
    output_file_df.at[Index,'Sample Calculated Weight Normalised']=sample_normalised_weight


In [191]:
# Calculate Volume to pipette for each sample in the final pool

vol_in_pool = float()
sum_vol_in_pool = float(0)
fail_bool = bool(0)

for row in input_file_df.itertuples():
    
    Index = row [0]
    Well = row[1]
    Concentration = row[3]
    
    vol_in_pool = output_file_df["Sample Calculated Weight Normalised"].iloc[Index]*final_pool_vol 
    sum_vol_in_pool += vol_in_pool # for QC, should be = final_pool_vol  
    
    if vol_in_pool < min_pipetting_capacity:
        output_report_df = output_report_df.append({'Sample Well ID': Well, 'Sample Concentration': Concentration, 'Comment': "Volume to be pipetted for this well ({} nL) is smaller than pipetting capacity ({} nL). You MUST reprocess the file".format(vol_in_pool,min_pipetting_capacity)}, ignore_index=True)
        fail_bool = bool(1)
    elif vol_in_pool > vol_available_in_well:
        output_report_df = output_report_df.append({'Sample Well ID': Well, 'Sample Concentration': Concentration, 'Comment': "Volume to be pipetted for this well ({} nL) is larger than the volume available ({} nL). You MUST reprocess the file".format(vol_in_pool,vol_available_in_well)}, ignore_index=True)
        fail_bool = bool(1)
    else:
        output_file_df.at[Index,'Sample Calculated Volume In Pool']=vol_in_pool
        output_echo_file_df = output_echo_file_df.append({'Source Well': Well, 'Destination Well': pool_well_in_output_plate, 'Transfer Volume': vol_in_pool}, ignore_index=True)     

In [192]:
if fail_bool = bool(1):
    print("Processing failed. The concentration of 1 sample or more is out of range. Check report file for details")
else:
    print("Processing successful! Check output file folder for processing details, report and ")
    output_file_df.to_csv(output_files_folder + output_file_name, index=False)
    output_echo_file_df.to_csv(output_files_folder + output_echo_file_name, index=False)

output_report_df.to_csv(output_files_folder + output_report_name, index=False)