In [66]:
import pandas as pd
import numpy as np

In [67]:
#list of files
f1 = f'../../data/luminescence/coexpression_for_manuscript/02.11.21+04.11.21.tsv'
f2 = f'../../data/luminescence/coexpression_for_manuscript/09.02.22.tsv'
f3 = f'../../data/luminescence/coexpression_for_manuscript/15.12.21.tsv'
f4 = f'../../data/luminescence/coexpression_for_manuscript/27.04.22.tsv'

In [68]:
#read in files
df1 = pd.read_csv(f1, sep='\t', header=0)
df2 = pd.read_csv(f2, sep='\t',header=0)
df3 = pd.read_csv(f3, sep='\t',header=0)
df4 = pd.read_csv(f4, sep='\t',header=0)

In [69]:
#function to filter TFs which aren't in the provided list of TFs
def filter_TFs(df,promoter,TFs):
    """function to filter TF-promoter interactions which aren't in the provided list of TFs for a specific promoter"""
    #first make a copy of the df
    df_copy = df.copy()
    #get list of TFs in df
    df_TFs = df_copy[df_copy.Promoter==promoter].TF_added.values.tolist()
    #get list of TFs that were present in the provided list of TFs
    TFs_in_df = [x for x in TFs if x in df_TFs]
    #filter df to only include TFs in TFs
    promoter_df = df_copy[df_copy.Promoter==promoter]
    promoter_df = promoter_df[promoter_df.TF_added.isin(TFs_in_df)]
    #replace subset of the original df with the filtered df
    df_copy[df_copy.Promoter==promoter] = promoter_df
    return df_copy

In [70]:
#function to remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
def remove_string_from_end(promoter):
    """function to remove string from end of promoter column if it ends with '_35s:LucF_calibrator'"""
    if promoter.endswith('_35s:LucF_calibrator'):
        promoter = promoter[:-len('_35s:LucF_calibrator')]
    return promoter
def remove_string_from_end_promoter_col(df):
    """function to remove string from end of promoter name if it ends with '_35s:LucF_calibrator'"""
    df['Promoter'] = df['Promoter'].apply(remove_string_from_end)
    return df

In [71]:
#filter columns from dfs
list_of_cols = ['Promoter','TF_added','fluc_luminescence','nluc_luminescence','nluc/fluc','corrected_luminescence','condition','Well_calibrator','date','Plate_number','well']
df1 = df1[list_of_cols]
df2 = df2[list_of_cols]
df3 = df3[list_of_cols]
df4 = df4[list_of_cols]

In [72]:
#filter out unwanted data points from the different dfs
#02.11.21+04.11.21
df1 = df1[df1['Promoter'] != 'NOS']
df1 = df1[df1['Promoter'] != 'NOS_calibrator_35s:LucF_calibrator']
df1 = df1[df1['Promoter'] != '35s_35s:LucF_calibrator']
##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df1 = remove_string_from_end_promoter_col(df1)

In [77]:
#ANAC032 promoter
# keep ARFs. DREB26, ANR1 and ANAC032 to ANAC032
df1 = filter_TFs(df1,'ANAC032',['bZIP3','DREB26','ANR1','ANAC032','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA'])

In [None]:
#ANR1 promoter
#keep ARFs, ANR1, DREB26, bZIP3, and HHO2 to ANR1
df1 = filter_TFs(df1,'ANR1',['bZIP3','DREB26','ANR1','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','HHO2'])

In [None]:
#ARF18 promoter
#keep ARFs, DREB26, ANR1, ANAC032 and bZIP3 to ARF18
df1 = filter_TFs(df1,'ARF18',['bZIP3','DREB26','ANR1','ANAC032','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA'])

In [None]:
#DREB26 promoter
#keep DREB26, ANR1, bZIP3, ANAC032 and HHO2 to DREB26
df1 = filter_TFs(df1,'DREB26',['bZIP3','DREB26','ANR1','ANAC032','HHO2'])