In [172]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [173]:
#function to read in csv file as pandas df
def read_csv(file_name):
    df = pd.read_csv(file_name)
    #keep the Well, Sample, Target, Cq and Amp Status columns
    df = df[['Well', 'Sample', 'Target', 'Cq', 'Amp Status']]
    
    return df

In [174]:
#function to filter out certain data points
def filter_data(df, amp_status, cq):
    #filter out the data points with amp_status = Amp using .loc
    df = df.loc[df['Amp Status'] == amp_status]
    #make Cq column numerical
    dfcopy = df.copy()
    dfcopy['Cq'] = pd.to_numeric(dfcopy['Cq'])  
    #filter out the data points with cq < cq_threshold using .loc
    dfcopy = dfcopy.loc[dfcopy['Cq'] <= cq]

    return dfcopy

In [180]:
#function to make new columns and sort the data
def sort_data(df):
    #make new column called EF1a_Cq, make the value in this column for a particular sample equal to the Cq value for the EF1a Target for that sample



    #make a df containing only EF1a target (housekeeping gene)
    df_EF1a = df.loc[df['Target'] == 'EF1a']
    #make df EF1a_Cq Column that is equal to the Cq value for the EF1a target for each sample
    df_EF1a.loc[:,'EF1a_Cq'] = df_EF1a['Cq']
    #df_EF1a['EF1a_Cq'] = df_EF1a['Cq']
    #remove Cq column from df_EF1a
    df_EF1a = df_EF1a.drop(['Cq'], axis=1)
    #make empty EF1a_Cq column in df
    df['EF1a_Cq'] = np.nan
    #merge the two dfs together
    df = pd.merge(df, df_EF1a, on=['Sample'])
    print(df)



    #df.loc[df, 'EF1a_Cq'] = df_EF1a['Cq']



    
    #if Sample column ends with NRT, add NRT_Cq column
    df['NRT_Cq'] = False
    df.loc[df['Sample'].str.endswith('NRT'), 'NRT'] = True
    #remove NRT string from Sample columns ending with NRT
    df['Sample'] = df['Sample'].str.replace('NRT', '')
    
    #if Sample column ends with H, add condition column with 10mM_nitrate
    df['condition'] = np.nan
    df.loc[df['Sample'].str.endswith('H'), 'condition'] = '10mM_nitrate'
    #remove H string from Sample columns ending with H
    df['Sample'] = df['Sample'].str.replace('H', '')

    #if Sample column ends with L, add condition column with 1mM_nitrate
    df.loc[df['Sample'].str.endswith('L'), 'condition'] = '1mM_nitrate'
    #remove L string from Sample columns ending with L
    df['Sample'] = df['Sample'].str.replace('L', '')
    #remove A, B or C string from Sample columns ending with A, B or C
    df['Sample'] = df['Sample'].str.replace('A', '')
    df['Sample'] = df['Sample'].str.replace('B', '')
    df['Sample'] = df['Sample'].str.replace('C', '')
    #remove whitespace from Sample columns
    df['Sample'] = df['Sample'].str.strip()
    
    return df


In [181]:
#function to normalise the data to the EF1a housekeeping gene Target Cq value for each sample
def normalise_data(df, target):
    #make a copy of the dataframe
    dfcopy = df.copy()
    #calculate Endogenous Control Mean for each sample
    #make a filter for the target gene
    housekeeping = dfcopy.loc[dfcopy['Target'] == target]
    #make copy of housekeeping
    housekeeping_copy = housekeeping.copy()

    housekeeping['EC Mean'] = housekeeping_copy.groupby(['Target','Sample','condition'])['Cq'].transform('mean')
    #print(housekeeping)
    
    #not housekeeping df
    not_housekeeping = dfcopy.loc[dfcopy['Target'] != target]
    # #add EC mean for each sample to the dataframe
    # not_housekeeping.groupby(['Sample'])['EC Mean'] = housekeeping.groupby(['Sample'])['EC Mean']




    #normalise the data to the EC Mean grouped by Target and sample

    # dfcopy['Normalised_Cq']

    # dfcopy['Normalised_Cq'] = dfcopy['Cq'] / housekeeping['EC Mean']
    





    # #for each Sample name, get the mean Cq for the EF1a target
    # dfcopy['Cq'] = dfcopy.groupby('Sample','EF1a')['Cq'].transform('mean')
    # #filter out the data points with target != target_threshold using .loc
    # dfcopy = dfcopy.loc[dfcopy['Target'] == target]
    # #make the Cq column numerical
    # dfcopy['Cq'] = pd.to_numeric(dfcopy['Cq'])
    # #make a new column called Normalised Cq
    # dfcopy['Normalised Cq'] = dfcopy['Cq']/dfcopy['Target']
    
    return not_housekeeping

In [182]:
# #function to normalise the data to the EF1a housekeeping gene Target Cq value
# def normalise_data(df, target):
#     #make a copy of the dataframe
#     dfcopy = df.copy()
#     #make Cq column numerical
#     dfcopy['Cq'] = pd.to_numeric(dfcopy['Cq'])
#     #filter out the data points with target = target using .loc
#     dfcopy = dfcopy.loc[dfcopy['Target'] == target]
#     #get the mean of the Cq values for the filtered data points
#     mean = dfcopy['Cq'].mean()
#     #divide the Cq values by the mean
#     dfcopy['Cq'] = dfcopy['Cq'] / mean
#     #return the dataframe
#     return dfcopy


In [183]:
# if __name__ == "__main__" function
def main():
    csv_file = '../../data/CRISPR_library/qPCR/10.8.22_platelayout_19310threshold.csv'
    #read in file
    df = read_csv(csv_file)
    #filter out the data points with amp_status = Amp and cq above 32
    df = filter_data(df, 'Amp', 32)
    #sort the data
    df = sort_data(df)
    #print(df)
    #print(df)
    #print(df[df.NRT==True])
    # normalised_housekeeping = normalise_data(df, 'EF1a')
    # print(normalised_housekeeping)
    normalised_housekeeping = normalise_data(df, 'EF1a')




In [184]:
if __name__ == "__main__":
    main()

    Well_x   Sample Target_x         Cq Amp Status_x  EF1a_Cq_x Well_y  \
0       A1  125-4AH     NLP7  27.376372          Amp        NaN     B1   
1       A1  125-4AH     NLP7  27.376372          Amp        NaN     B2   
2       A1  125-4AH     NLP7  27.376372          Amp        NaN     B3   
3       A2  125-4AH     NLP7  26.662845          Amp        NaN     B1   
4       A2  125-4AH     NLP7  26.662845          Amp        NaN     B2   
..     ...      ...      ...        ...          ...        ...    ...   
847    P14  142-8CL     EF1a  23.558375          Amp        NaN    P14   
848    P14  142-8CL     EF1a  23.558375          Amp        NaN    P15   
849    P15  142-8CL     EF1a  23.855986          Amp        NaN    P13   
850    P15  142-8CL     EF1a  23.855986          Amp        NaN    P14   
851    P15  142-8CL     EF1a  23.855986          Amp        NaN    P15   

    Target_y Amp Status_y  EF1a_Cq_y  
0       EF1a          Amp  22.163446  
1       EF1a          Amp  21.993

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_EF1a.loc[:,'EF1a_Cq'] = df_EF1a['Cq']


KeyError: 'Target'