In [425]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [426]:
#function to read in csv file as pandas df
def read_csv(file_name):
    df = pd.read_csv(file_name)
    #keep the Well, Sample, Target, Cq and Amp Status columns
    df = df[['Well', 'Sample', 'Target', 'Cq', 'Amp Status']]
    
    return df

In [427]:
#function to filter out certain data points
def filter_data(df, amp_status, cq):
    #filter out the data points with amp_status = Amp using .loc
    df = df.loc[df['Amp Status'] == amp_status]
    #make Cq column numerical
    dfcopy = df.copy()
    dfcopy['Cq'] = pd.to_numeric(dfcopy['Cq'])  
    #filter out the data points with cq < cq_threshold using .loc
    dfcopy = dfcopy.loc[dfcopy['Cq'] <= cq]

    return dfcopy

In [428]:
#function to make new columns and sort the data
def sort_data(df):
    #make new column called EF1a_Cq, make the value in this column for a particular sample equal to the Cq value for the EF1a Target for that sample
    ##remove if not amplified
    df = df.loc[df['Amp Status'] == 'Amp']

    #get the mean of each sample/target (take mean of technical replicates)
    df['Cq_mean'] = df.groupby(['Sample','Target'])['Cq'].transform('mean')
    #make a df containing only EF1a target (housekeeping gene)
    df_EF1a = df.loc[df['Target'] == 'EF1a'].copy()

    #rename the Cq_mean column to EF1a_Cq_mean
    df_EF1a.rename(columns={'Cq_mean': 'EF1a_Cq_mean'}, inplace=True)
    #filter other df_EF1a columns
    df_EF1a = df_EF1a[['Sample','EF1a_Cq_mean']]
    #remove duplicates from df_EF1a
    df_EF1a = df_EF1a.drop_duplicates()
    
    #merge the two dfs together
    df = pd.merge(df, df_EF1a, on=['Sample'], how='left')

    #normalise based on eEF1a gene
    df = normalise_data(df, 'Cq_mean','EF1a_Cq_mean','MeanCq_ECnormalised')
    


    
   # print(df)
    #if Sample column ends with NRT, add NRT_Cq column
    df['NRT'] = False
    df.loc[df['Sample'].str.endswith('NRT'), 'NRT'] = True
    #remove NRT string from Sample columns ending with NRT
    df['Sample'] = df['Sample'].str.replace('NRT', '')
    
    #if Sample column ends with H, add condition column with 10mM_nitrate
    df['condition'] = np.nan
    df.loc[df['Sample'].str.endswith('H'), 'condition'] = '10mM_nitrate'
    #remove H string from Sample columns ending with H
    df['Sample'] = df['Sample'].str.replace('H', '')

    #if Sample column ends with L, add condition column with 1mM_nitrate
    df.loc[df['Sample'].str.endswith('L'), 'condition'] = '1mM_nitrate'
    #remove L string from Sample columns ending with L
    df['Sample'] = df['Sample'].str.replace('L', '')
    #remove A, B or C string from Sample columns ending with A, B or C
    df.loc[:, 'Sample_old'] = df['Sample']
    df['Sample'] = df['Sample'].str.replace('A', '')
    df['Sample'] = df['Sample'].str.replace('B', '')
    df['Sample'] = df['Sample'].str.replace('C', '')
    #remove whitespace from Sample columns
    df['Sample'] = df['Sample'].str.strip()
 
    #now make a df containing only Samples with 1mM_nitrate condition
    df_1mM_nitrate = df.loc[df['condition'] == '1mM_nitrate'].copy()
    #rename MeanCq_ECnormalised column to 1mMnitrate_Cq_mean
    df_1mM_nitrate.rename(columns={'MeanCq_ECnormalised': '1mMnitrate_Cq_mean'}, inplace=True)
    #filter other columns
    df_1mM_nitrate = df_1mM_nitrate[['Sample_old','Target','1mMnitrate_Cq_mean']]
    #remove duplicates from df_1mM_nitrate
    df_1mM_nitrate = df_1mM_nitrate.drop_duplicates()
    #merge the dfs
    df = pd.merge(df, df_1mM_nitrate, on=['Sample_old','Target'], how='left')

    #remove nan
    df = df.dropna()

    #remove NRT values
    df = df.loc[df['NRT'] == False]

    return df


In [429]:
#function to normalise the data based on a column of Cq values (either to housekeeping or based on nitrate or wild type plant)
def normalise_data(df, orig_col,normalisation_col, new_column_name):
   
    #normalise Cq values to the EF1a housekeeping gene mean Cq value for each sample
    df.loc[:,new_column_name] = df[orig_col] - df[normalisation_col]
    #remove nan values in the new column
    df = df[df[new_column_name].notna()]


    return df

In [430]:
#make plots
def make_plots(df):
    """function to make barplots of relative expression of each target gene in each plant line"""
    #first do inverse log transformation
    #(fold change of GOI in treated sample if delta delta Ct value  = X then relative expression  = 2 ( to the power of X))
    df['relative_expression'] = 2**(df['MeanCq_EC_1mM_nitrate_normalised'])
    print(df)


In [431]:
# if __name__ == "__main__" function
def main():
    csv_file = '../../data/CRISPR_library/qPCR/10.8.22_platelayout_19310threshold.csv'
    #read in file
    df = read_csv(csv_file)
    #filter out the data points with amp_status = Amp and cq above 32
    df = filter_data(df, 'Amp', 40)
    #sort the data
    df = sort_data(df)
    #normalise based on 1mM_nitrate Cq values
    df = normalise_data(df, 'MeanCq_ECnormalised','1mMnitrate_Cq_mean','MeanCq_EC_1mM_nitrate_normalised')
    #now filter columns
    df = df[['Sample','Target','MeanCq_EC_1mM_nitrate_normalised','condition']]
    #make plots
    make_plots(df)

    #print(df)
    #print(df)
    #print(df[df.NRT==True])
    # normalised_housekeeping = normalise_data(df, 'EF1a')
    # print(normalised_housekeeping)
    #normalised_housekeeping = normalise_data(df, 'EF1a')




In [432]:
if __name__ == "__main__":
    main()

    Sample Target  MeanCq_EC_1mM_nitrate_normalised     condition  \
0    125-4   NLP7                         -0.309717  10mM_nitrate   
1    125-4   NLP7                         -0.309717  10mM_nitrate   
2    125-4   NLP7                         -0.309717  10mM_nitrate   
3    125-4   ARF9                          0.590813  10mM_nitrate   
4    125-4   ARF9                          0.590813  10mM_nitrate   
..     ...    ...                               ...           ...   
299  142-8   NIR1                          0.000000   1mM_nitrate   
300  142-8   NIR1                          0.000000   1mM_nitrate   
301  142-8   EF1a                          0.000000   1mM_nitrate   
302  142-8   EF1a                          0.000000   1mM_nitrate   
303  142-8   EF1a                          0.000000   1mM_nitrate   

     relative_expression  
0               0.806800  
1               0.806800  
2               0.806800  
3               1.506096  
4               1.506096  
..       