In [35]:
import pandas as pd
import numpy as np

In [36]:
#list of files
f1 = f'../../data/luminescence/coexpression_for_manuscript/02.11.21+04.11.21.tsv'
f2 = f'../../data/luminescence/coexpression_for_manuscript/15.12.21.tsv'
f3 = f'../../data/luminescence/coexpression_for_manuscript/09.02.22.tsv'
f4 = f'../../data/luminescence/coexpression_for_manuscript/27.04.22.tsv'

In [37]:
#read in files
df1 = pd.read_csv(f1, sep='\t', header=0)
df2 = pd.read_csv(f2, sep='\t',header=0)
df3 = pd.read_csv(f3, sep='\t',header=0)
df4 = pd.read_csv(f4, sep='\t',header=0)

In [38]:
#function to filter TFs which aren't in the provided list of TFs
def filter_TFs(df,promoter,TFs):
    """function to filter TF-promoter interactions which aren't in the provided list of TFs for a specific promoter"""
    #first make a copy of the df
    df_copy = df.copy()
    #get list of TFs in df
    df_TFs = df_copy[df_copy.Promoter==promoter].TF_added.values.tolist()
    print(f'TFs={df_TFs}')
    #get list of TFs that were present in the provided list of TFs
    TFs_in_df = [x for x in TFs if x in df_TFs]
    print(f'TFs_in_df={TFs_in_df}')
    #append "No TF" to TFs_in_df
    TFs_in_df.append('No TF')
    TFs_in_df.append('No TF_1μM_NAA')
    #append
    #filter df to only include TFs in TFs
    promoter_df = df_copy[df_copy.Promoter==promoter]
    promoter_df = promoter_df[promoter_df.TF_added.isin(TFs_in_df)]
    #replace subset of the original df with the filtered df
    df_copy[df_copy.Promoter==promoter] = promoter_df
    #remove nan
    df_copy = df_copy.dropna()
    return df_copy

In [39]:
#function to remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
def remove_string_from_end(string,to_remove='_35s:LucF_calibrator'):
    """function to remove string from end of promoter column if it ends with eg. '_35s:LucF_calibrator'"""
    if string.endswith(to_remove):
        string = string[:-len(to_remove)]
    return string
def remove_string_from_end_promoter_col(df):
    """function to remove string from end of promoter name if it ends with eg. '_35s:LucF_calibrator'"""
    df['Promoter'] = df['Promoter'].apply(remove_string_from_end)
    return df

In [40]:
#function to remove string from end of TF_added column if it ends with '_1μM_NAA'
def remove_string_from_end_TF_added_col(df):
    """function to remove string from end of TF_added column if it ends with '_1μM_NAA'"""
    df['TF_added'] = df['TF_added'].apply(lambda x: remove_string_from_end(x,to_remove='_1μM_NAA'))
    return df

In [41]:
#filter columns from dfs
list_of_cols = ['Promoter','TF_added','fluc_luminescence','nluc_luminescence','nluc/fluc','corrected_luminescence','condition','Well_calibrator','date','Plate_number','well']
df1 = df1[list_of_cols]
df2 = df2[list_of_cols]
df3 = df3[list_of_cols]
df4 = df4[list_of_cols]

In [42]:
#make copy of df4 for later
df4_copy = df4.copy()


In [43]:
#filter out unwanted data points from the different dfs
#02.11.21+04.11.21
##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df1 = remove_string_from_end_promoter_col(df1)
#remove unwanted promoters
df1 = df1[df1['Promoter'] != 'NOS']
df1 = df1[df1['Promoter'] != 'NOS_calibrator']
df1 = df1[df1['Promoter'] != '35s']
#remove row if well calibrator column is NOS_LucF and TF_added column is ANAC032
df1 = df1.loc[~((df1['Well_calibrator'] == 'NOS_LucF') & (df1['TF_added'] == 'ANAC032'))]

In [44]:
#ANAC032 promoter
# keep ARFs, DREB26, ANR1, bZIP3 and ANAC032 to ANAC032
df1 = filter_TFs(df1,'ANAC032',['bZIP3','DREB26','ANR1','ANAC032','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA'])

TFs=['ANR1', 'ARF9', 'ARF9', 'ARF9', 'ARF9_1μM_NAA', 'ANR1', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'ANR1', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF18+ARF9', 'No TF nor YFP', 'ARF18+IAA33_1μM_NAA', 'NLP6', 'NLP6', 'NLP6', 'ARF18', 'ARF18+IAA33_1μM_NAA', 'ARF18', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'bZIP3', 'HHO2', 'HHO2', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'DREB26', 'DREB26', 'DREB26', 'No TF nor YFP', 'No TF nor YFP', 'HHO2', 'bZIP3', 'ARF18', 'No TF', 'ARF18_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'bZIP3', 'NLP7', 'NLP7', 'TGA1', 'TGA1', 'ARF18_1μM_NAA', 'TGA1', 'ARF18+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33', 'No TF', 'No TF', 'NLP7', 'ARF18_1μM_NAA', 'ANAC032', 'No TF_1μM_NAA', 'NLP6', 'NLP6', 'NLP6', 'No TF nor YFP', 'No TF nor YFP', 'No TF nor YFP', 'No TF_1μM_NAA', 'NLP7', 'NLP7', 'NLP7', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF', 'No TF', 'No TF', 'ANAC032', 'ANAC032',

In [45]:
#ANR1 promoter
#keep ARFs, ANR1, DREB26, bZIP3, and HHO2 to ANR1
df1 = filter_TFs(df1,'ANR1',['bZIP3','DREB26','ANR1','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','HHO2','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA',])

TFs=['NLP7', 'ANR1', 'NLP7', 'ARF18_1μM_NAA', 'NLP7', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18_1μM_NAA', 'ARF9', 'ARF9', 'ARF9', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9', 'ANR1', 'ANR1', 'ARF18+IAA33_1μM_NAA', 'ARF18_1μM_NAA', 'ARF18+ARF9', 'No TF_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'HHO2', 'HHO2', 'HHO2', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'DREB26', 'DREB26', 'DREB26', 'No TF nor YFP', 'No TF nor YFP', 'No TF nor YFP', 'No TF_1μM_NAA', 'ARF18', 'ARF18', 'ARF18+ARF9_1μM_NAA', 'NLP6', 'No TF_1μM_NAA', 'TGA1', 'TGA1', 'ARF18+IAA33', 'ARF18+IAA33', 'No TF', 'ARF18+IAA33', 'TGA1', 'No TF', 'bZIP3', 'bZIP3', 'bZIP3', 'NLP6', 'NLP6', 'No TF', 'ARF18', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'ANAC032', 'ANAC032', 'ANAC032', 'No TF', 'NLP6', 'No TF', 'NLP6', 'No TF', 'NLP6', 'No TF nor YFP', 'No TF nor YFP', 'No TF nor YFP', 'NLP7', 'NLP7', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_

In [46]:
#ARF18 promoter
#keep ARFs, DREB26, ANR1, ANAC032 and bZIP3 to ARF18
df1 = filter_TFs(df1,'ARF18',['bZIP3','DREB26','ANR1','ANAC032','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA',])

TFs=['ARF18+IAA33_1μM_NAA', 'dCpf1_repressor2', 'ANR1', 'ANR1', 'ARF18+IAA33_1μM_NAA', 'dCpf1_repressor2', 'ARF18+IAA33_1μM_NAA', 'dCpf1_repressor1', 'ARF18_1μM_NAA', 'dCpf1_repressor1', 'ARF18_1μM_NAA', 'ARF18_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'dCas9_activator3', 'dCpf1_repressor2', 'dCpf1_repressor1', 'ARF18+ARF9', 'bZIP3', 'ARF18+ARF9', 'TGA1', 'TGA1', 'TGA1', 'NLP6', 'NLP6', 'NLP6', 'dCas9_activator3', 'bZIP3', 'ARF18+ARF9', 'bZIP3', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'dCpf1_repressor3', 'dCpf1_repressor3', 'dCpf1_repressor3', 'HHO2', 'HHO2', 'HHO2', 'ARF18+ARF9_1μM_NAA', 'dCas9_activator3', 'ANR1', 'ARF18', 'DREB26', 'dCas9_activator1', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP', 'ARF18', 'ARF18', 'NLP7', 'NLP7', 'DREB26', 'DREB26', 'dCas9_activator1', 'No TF nor YFP', 'dCas9_activator2', 'No TF nor YFP', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF18+IAA33', 'NLP7', 'No TF', 'No TF', 'dCas9_activator2', 'dC

In [47]:
#DREB26 promoter
#keep DREB26, ANR1, bZIP3, ANAC032 and HHO2 to DREB26
df1 = filter_TFs(df1,'DREB26',['bZIP3','DREB26','ANR1','ANAC032','HHO2'])

TFs=['ARF18_1μM_NAA', 'ARF18_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'TGA1', 'ANR1', 'ANR1', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF18+ARF9+IAA33', 'ARF18+ARF9+IAA33', 'ANR1', 'ARF18_1μM_NAA', 'ARF18+ARF9', 'No TF', 'ARF9+IAA33_1μM_NAA', 'DREB26', 'DREB26', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'ARF18', 'No TF', 'ARF18', 'ARF18+ARF9', 'ARF18+ARF9+IAA33', 'ARF18+ARF9', 'DREB26', 'No TF nor YFP', 'No TF nor YFP', 'No TF', 'ARF18', 'HHO2', 'ARF9+IAA33', 'HHO2', 'TGA1', 'NLP6', 'NLP6', 'NLP6', 'ARF9', 'ARF9', 'ARF9', 'ARF9_1μM_NAA', 'bZIP3', 'bZIP3', 'ARF9_1μM_NAA', 'HHO2', 'NLP7', 'bZIP3', 'ARF9_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'No TF nor YFP', 'TGA1', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'NLP7', 'ARF9+IAA33', 'ARF9+IAA33', 'ARF9+IAA33_1μM_NAA'

In [48]:
#NIR1 promoter
#keep DREB26, ANAC032, ANR1, ARFs, bZIP3, and HHO2 to NIR1
df1 = filter_TFs(df1,'NIR1',['bZIP3','DREB26','ANR1','ANAC032','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','HHO2'])

TFs=['No TF', 'ARF18_1μM_NAA', 'No TF', 'ARF18_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9', 'No TF', 'ARF18+ARF9_1μM_NAA', 'ARF18_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'ARF18', 'DREB26', 'DREB26', 'DREB26', 'No TF nor YFP', 'No TF nor YFP', 'ARF9+IAA33_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'ARF18', 'ARF18', 'No TF nor YFP', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'NLP6', 'ARF9', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF18+ARF9+IAA33', 'bZIP3', 'bZIP3', 'bZIP3', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'HHO2', 'HHO2', 'ARF18+ARF9+IAA33', 'ARF18+ARF9+IAA33', 'HHO2', 'ARF9', 'NLP6', 'ARF9', 'No TF_1μM_NAA', 'NLP7', 'NLP7', 'NLP7', 'ARF18+IAA33', 'ARF9+IAA33', 'ARF9+IAA33', 'ARF9+IAA33', 'ARF9_1μM_NAA', 'ANR1', 'ARF9_1μM_NAA', 'No TF_1μM_NAA', 'ARF9_1μM_NAA', 'TGA1', 'TGA1', 'TGA1', 'ANR1', 'NLP6', 'No TF_1μM_NAA'

In [49]:
#NLP6 promoter
#keep ANR1, ARFs, bZIP3 and HHO2 to NLP6
df1 = filter_TFs(df1,'NLP6',['bZIP3','ANR1','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','HHO2'])

TFs=['ARF18+ARF9_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'HHO2', 'HHO2', 'HHO2', 'ARF18+ARF9_1μM_NAA', 'No TF nor YFP', 'DREB26', 'DREB26', 'DREB26', 'No TF nor YFP', 'No TF nor YFP', 'No TF nor YFP_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'ARF18', 'NLP7', 'NLP7', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ANR1', 'ANR1', 'ANR1', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF9', 'ARF9', 'ARF9', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'ARF18', 'NLP7', 'ARF18_1μM_NAA', 'ARF9_1μM_NAA', 'ARF18_1μM_NAA', 'ARF18_1μM_NAA', 'NLP6', 'NLP6', 'ARF18', 'bZIP3', 'bZIP3', 'bZIP3', 'No TF', 'No TF', 'No TF', 'NLP6', 'ARF18+IAA33', 'No TF_1μM_NAA', 'ARF18+IAA33', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'TGA1', 'TGA1', 'TGA1', 'ARF18+IAA33', 'NLP6', 'No TF', 'NLP6', 'NLP6', 'No TF', 'No TF nor YFP', 'No TF nor YFP', 'No TF', 'ANAC032', 'ANAC032', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'ANAC032', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'NLP7', 'NLP7', 'NLP7',

In [50]:
#NLP7 promoter
#keep ARFs, ANR1, DREB26, bZIP3, and HHO2 to NLP7
df1 = filter_TFs(df1,'NLP7',['bZIP3','DREB26','ANR1','ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','HHO2'])

TFs=['ARF18_1μM_NAA', 'ARF18+ARF9+IAA33', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF18+IAA33', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'ANR1', 'ARF18_1μM_NAA', 'ANR1', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18_1μM_NAA', 'ANR1', 'ARF18+ARF9+IAA33', 'ARF18+IAA33_1μM_NAA', 'HHO2', 'ARF9', 'No TF', 'NLP6', 'bZIP3', 'bZIP3', 'bZIP3', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'NLP6', 'NLP6', 'TGA1', 'HHO2', 'HHO2', 'ARF18+ARF9+IAA33_1μM_NAA', 'No TF', 'DREB26', 'TGA1', 'ARF9', 'NLP7', 'NLP7', 'NLP7', 'ARF9+IAA33', 'ARF9+IAA33', 'No TF nor YFP', 'No TF nor YFP', 'No TF nor YFP', 'DREB26', 'No TF', 'ARF9+IAA33_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'DREB26', 'ARF9_1μM_NAA', 'TGA1', 'ARF9_1μM_NAA', 'ARF18+ARF9', 'ARF9+IAA33', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF9', 'ARF18', 'ARF18', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'No TF nor YFP_1μM_NAA', 'ARF9_1μM_NAA',

In [51]:
#filter out unwanted data points from the different dfs
#15.12.21
##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df2 = remove_string_from_end_promoter_col(df2)
df2 = df2[df2['Promoter'] != 'ANR1']
df2 = df2[df2['Promoter'] != 'NIR1']
df2 = df2[df2['Promoter'] != 'NOS']
df2 = df2[df2['Promoter'] != '4x[bZIP3-random]:NLUC:YFP']
df2 = df2[df2['Promoter'] != '4x[NRE-TATA]+spacing:NLUC:YFP']
df2 = df2[df2['Promoter'] != 'NRP']
#df2 = df2[df2['Promoter'] != 'NRP_35s:LucF_calibrator']
df2 = df2[df2['Promoter'] != '35s:NLUC:YFP']
df2 = df2[df2['Promoter'] != '35s:NLUC:YFP_nomarker']
#df2 = df2[df2['Promoter'] != '4x[NRE-TATA]+spacing:NLUC:YFP_35s:LucF_calibrator']
df2 = df2[df2['Promoter'] != '4x[bZIP3-random]:NLUC:YFP']
df2 = df2[df2['Promoter'] != '4x[NRE-TATA]+spacing']
# df2 = df2[df2['Promoter'] != 'NRP:NLUC:YFP_35s:LucF_calibrator']
# df2 = df2[df2['Promoter'] != 'NIR1:NLUC:YFP_35s:LucF_calibrator']
# df2 = df2[df2['Promoter'] != 'NOS_calibrator_35s:LucF_calibrator']
# df2 = df2[df2['Promoter'] != '35s_35s:LucF_calibrator']
df2 = df2[df2['Promoter'] != '35s']
df2 = df2[df2['Promoter'] != 'NOS_calibrator']
df2 = df2[df2['Promoter'] != 'NRP:NLUC:YFP']
df2 = df2[df2['Promoter'] != 'NIR1:NLUC:YFP']
#remove row if well calibrator column is NOS_LucF and TF_added column is ANAC032
df2 = df2.loc[~((df2['Well_calibrator'] == 'NOS_LucF') & (df2['TF_added'] == 'ANAC032'))]


In [52]:
#ANAC032 promoter
#keep HHO2 to ANAC032
df2 = filter_TFs(df2,'ANAC032',['HHO2'])
#DREB26 promoter
#keep ARFs to DREB26
df2 = filter_TFs(df2,'DREB26',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA',])
#NLP6 promoter
#keep DREB26 and ANAC032 to NLP6
df2 = filter_TFs(df2,'NLP6',['DREB26','ANAC032'])
#NLP7 promoter
#keep ANAC032 to NLP7
df2 = filter_TFs(df2,'NLP7',['ANAC032'])
#ARF18 promoter
#keep HHO2 to ARF18
df2 = filter_TFs(df2,'ARF18',['HHO2'])

TFs=['No TF', 'TGA1', 'HHO2', 'TGA1', 'HHO2', 'HHO2', 'No TF', 'No TF', 'TGA1']
TFs_in_df=['HHO2']
TFs=['TGA1', 'No TF', 'No TF', 'TGA1', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'ARF9_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'No TF_1μM_NAA', 'ARF9+IAA33', 'ARF9+IAA33', 'ARF9+IAA33', 'No TF', 'TGA1', 'ARF9', 'ARF18', 'ARF9', 'ARF18', 'ARF18', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF18+ARF9', 'ARF18_1μM_NAA', 'ARF18_1μM_NAA', 'ARF18_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF18+ARF9_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'ARF9+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33', 'ARF9', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+ARF9+IAA33', 'ARF18+ARF9+IAA33', 'ARF18+ARF9+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF9+IAA33_1μM_NAA']
TFs_in_df=['ARF18', 'ARF9', 'ARF9_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18_1μM_NAA', 'ARF9+IAA33', 'ARF9+IAA33_1μM_NAA',

In [53]:
#filter out unwanted data points from the different dfs
#9.2.22
##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df3 = remove_string_from_end_promoter_col(df3)
#print(df3['Promoter'].value_counts())
#remove unwanted promoters
df3 = df3[df3['Promoter'] != 'NOS']
df3 = df3[df3['Promoter'] != 'NRP']
df3 = df3[df3['Promoter'] != '35s']
#df3 = df3[df3['Promoter'] != '4x[NRE-TATA]+spacing_35s:LucF_calibrator']
df3 = df3[df3['Promoter'] != 'ARF18']
df3 = df3[df3['Promoter'] != 'NLP7']
df3 = df3[df3['Promoter'] != '4x[NRE-TATA]+spacing']
df3 = df3[df3['Promoter'] != 'NOS_calibrator']
df3 = df3[df3['Promoter'] != 'NRP:NLUC:YFP']
df3 = df3[df3['Promoter'] != 'NIR1:NLUC:YFP']
df3 = df3[df3['Promoter'] != '4x[bZIP3-random]:NLUC:YFP']
df3 = df3[df3['Promoter'] != '4x[NRE-TATA]+spacing:NLUC:YFP']
df3 = df3[df3['Promoter'] != '35s:NLUC:YFP']
df3 = df3[df3['Promoter'] != '35s:NLUC:YFP_nomarker']
# df3 = df3[df3['Promoter'] != 'ARF18_35s:LucF_calibrator']
# df3 = df3[df3['Promoter'] != 'NLP7_35s:LucF_calibrator']
# df3 = df3[df3['Promoter'] != 'NOS_calibrator_35s:LucF_calibrator']
# df3 = df3[df3['Promoter'] != '35s_35s:LucF_calibrator']
#remove row if well calibrator column is NOS_LucF and TF_added column is ANAC032
df3 = df3.loc[~((df3['Well_calibrator'] == 'NOS_LucF') & (df3['TF_added'] == 'ANAC032'))]

In [54]:
#ANR1 promoter
#keep ANAC032, NLP6 and NLP7 to ANR1
df3 = filter_TFs(df3,'ANR1',['ANAC032','NLP6','NLP7'])
#NIR1 promoter
#keep NLP6 and NLP7 to NIR1
df3 = filter_TFs(df3,'NIR1',['NLP6','NLP7'])
#DREB26 promoter
#keep NLP6 and NLP7 to DREB26
df3 = filter_TFs(df3,'DREB26',['NLP6','NLP7'])
#ANAC032 promoter
#keep NLP6 and NLP7 to ANAC032
df3 = filter_TFs(df3,'ANAC032',['NLP6','NLP7'])
#NLP6 promoter
#keep NLP6 and NLP7 to NLP6
df3 = filter_TFs(df3,'NLP6',['NLP6','NLP7'])

TFs=['NLP6', 'No TF', 'TGA1', 'NLP6', 'NLP6', 'NLP6', 'NLP6', 'TGA1:GR', 'TGA1:GR', 'TGA1:GR', 'TGA1', 'NLP6', 'NLP6', 'No TF', 'TGA1', 'No TF', 'NLP6', 'NLP6', 'NLP7', 'NLP7', 'NLP7', 'No TF', 'No TF', 'No TF', 'NLP7', 'NLP7', 'NLP7', 'ANAC032', 'ANAC032', 'ANAC032', 'NLP7', 'NLP7', 'NLP7']
TFs_in_df=['ANAC032', 'NLP6', 'NLP7']
TFs=['TGA1:GR', 'NLP6', 'TGA1:GR', 'NLP6', 'TGA1:GR', 'No TF', 'No TF', 'No TF', 'NLP6', 'No TF', 'NLP7', 'NLP7', 'NLP7', 'No TF', 'No TF']
TFs_in_df=['NLP6', 'NLP7']
TFs=['No TF', 'TGA1:GR', 'No TF', 'No TF', 'TGA1', 'TGA1', 'TGA1:GR', 'NLP6', 'TGA1', 'NLP6', 'NLP6', 'TGA1:GR', 'NLP7', 'NLP7', 'No TF', 'NLP7', 'No TF', 'No TF']
TFs_in_df=['NLP6', 'NLP7']
TFs=['NLP6', 'NLP6', 'TGA1', 'TGA1', 'NLP6', 'TGA1', 'NLP6', 'NLP6', 'NLP6', 'TGA1:GR', 'NLP6', 'NLP6', 'NLP6', 'No TF', 'No TF', 'No TF', 'TGA1:GR', 'TGA1:GR', 'NLP7', 'NLP7', 'No TF', 'No TF', 'NLP7', 'NLP7', 'NLP7', 'NLP7', 'NLP7', 'NLP7', 'NLP7', 'No TF']
TFs_in_df=['NLP6', 'NLP7']
TFs=['TGA1:GR', 'TGA1', 

In [55]:
#filter out unwanted data points from the different dfs
#27.04.22
df4 = df4[df4['Promoter'] != 'NOS']
df4 = df4[df4['Promoter'] != 'ANAC032_ablated_TGA1_all_sites']
df4 = df4[df4['Promoter'] != 'ANAC032_ablated_ARFs']
df4 = df4[df4['Promoter'] != 'ANAC032']
df4 = df4[df4['Promoter'] != 'NIR1_ablated_NRE_35s:LucF_calibrator']
df4 = df4[df4['Promoter'] != 'NIR1_ablated_NRE_all_sites_35s:LucF_calibrator']
df4 = df4[df4['Promoter'] != 'NLP7_ablated_ANAC032_all_sites_35s:LucF_calibrator']
df4 = df4[df4['Promoter'] != 'NOS_35s:LucF_calibrator']
df4 = df4[df4['Promoter'] != 'NOS_calibrator']
df4 = df4[df4['Promoter'] != 'NOS_calibrator_35s:LucF_calibrator']
df4 = df4[df4['Promoter'] != '35s_35s:LucF_calibrator']
#remove row if well calibrator column is NOS_LucF and TF_added column is ANAC032
df4 = df4.loc[~((df4['Well_calibrator'] == 'NOS_LucF') & (df4['TF_added'] == 'ANAC032'))]

##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df4 = remove_string_from_end_promoter_col(df4)


In [56]:
#ARF18 promoter
#keep NLP6 and NLP7 to ARF18
df4 = filter_TFs(df4,'ARF18',['NLP6','NLP7'])
#NIR1 promoter
#keep NLP6 and NLP7 to NIR1
df4 = filter_TFs(df4,'NIR1',['NLP6','NLP7'])
#NLP7 promoter
#keep NLP6 and NLP7 to NLP7
df4 = filter_TFs(df4,'NLP7',['NLP6','NLP7'])

TFs=['No TF_EtOH+DMSO', 'TAD-Suntag_guide1', 'SRDX-Suntag_123guides1,2,3', 'SRDX-Suntag_noguide', 'SRDX-Suntag_noguide', 'SRDX-Suntag_noguide', 'TGA1:GR_EtOH+DMSO', 'TGA1:GR_EtOH+DMSO', 'TGA1:GR_EtOH+DMSO', 'SRDX-dCpf1_guides1,2,4', 'SRDX-dCpf1_guides1,2,4', 'TGA1:GR_10uM_DEX+DMSO', 'TGA1:GR_10uM_DEX+DMSO', 'TGA1:GR_10uM_DEX+DMSO', 'VP64-Suntag_guide1', 'VP64-Suntag_guide1', 'VP64-Suntag_guide1', 'TGA1:GR_35uM_CHX+EtOH', 'TGA1:GR_35uM_CHX+EtOH', 'TAD-Suntag_guide1', 'TGA1:GR_10uM_DEX+35uM_CHX', 'SRDX-dCpf1_guide2', 'SRDX-dCpf1_guide2', 'TGA1:GR_10uM_DEX+35uM_CHX', 'TGA1:GR_10uM_DEX+35uM_CHX', 'SRDX-Suntag_123guides1,2,3', 'TAD-Suntag_guides1,2,3', 'TAD-Suntag_guides1,2,3', 'SRDX-dCpf1_guide2', 'SRDX-dCpf1_guide1', 'SRDX-dCpf1_guide1', 'SRDX-dCpf1_guide1', 'TGA1:GR_35uM_CHX+EtOH', 'TAD-Suntag_guides1,2,3', 'SRDX-Suntag_123guides1,2,3', 'SRDX-dCpf1_guides1,2,4', 'No TF_10uM_DEX+35uM_CHX', 'No TF_10uM_DEX+DMSO', 'No TF_10uM_DEX+DMSO', 'SRDX-dCpf1_noguide', 'SRDX-dCpf1_noguide', 'SRDX-dCpf

In [57]:
#change date format of df3 to dd.mm.yy format
df3['date'] = '09.02.22'

#merge the four dfs into one df
df = pd.concat([df1,df2,df3,df4])
#make a copy of the df
df_copy = df.copy()
#remove NLPs to NIR1 in the 09.02.22 samples
df = df.loc[~(((df['TF_added'] == 'NLP6') | (df['TF_added'] == 'NLP7')) & (df['Promoter'] == 'NIR1') & (df['date'] == '09.02.22'))]
#save the df to a tsv file for use in a circos plot
df.to_csv(f'../../data/luminescence/coexpression_for_manuscript/coexpression_data_for_circos_plot.tsv',sep='\t',index=False)

In [58]:

#remove NAA string from promoters
df_copy = remove_string_from_end_TF_added_col(df_copy)
#remove NLPs to NIR1 in 27.04.22 samples
df_copy = df_copy.loc[~(((df_copy['TF_added'] == 'NLP6') | (df_copy['TF_added'] == 'NLP7') | (df_copy['TF_added'] == 'No TF')) & (df_copy['Promoter'] == 'NIR1') & (df_copy['date'] == '27.04.22'))]
#now filter out more unwanted TFs for the manuscript
df_copy = df_copy[df_copy['TF_added'] != 'HHO2']
df_copy = df_copy[df_copy['TF_added'] != 'bZIP3']
df_copy = df_copy[df_copy['TF_added'] != 'ARF18+ARF9+IAA33']
df_copy = df_copy[df_copy['TF_added'] != 'ARF9+IAA33']
df_copy = df_copy[df_copy['TF_added'] != 'ARF18+IAA33']
#save the df_copy to a tsv file for use in manuscript
df_copy.to_csv(f'../../data/luminescence/coexpression_for_manuscript/coexpression_data_for_manuscript.tsv',sep='\t',index=False)

In [59]:
#now filter out data points keeping only native promoters with ablated TFBSs
#27.04.22

##remove string from end of promoter name (Promoter column) if it ends with '_35s:LucF_calibrator'
df4_copy = remove_string_from_end_promoter_col(df4_copy)

df4_copy = df4_copy[df4_copy['Promoter'] != 'NOS']
df4_copy = df4_copy[df4_copy['Promoter'] != 'ARF18']
df4_copy = df4_copy[df4_copy['Promoter'] != '35s']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'ANAC032_ablated_TGA1_all_sites']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'ANAC032_ablated_ARFs']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'ANAC032']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'NIR1_ablated_NRE_35s:LucF_calibrator']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'NIR1_ablated_NRE_all_sites_35s:LucF_calibrator']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'NLP7_ablated_ANAC032_all_sites_35s:LucF_calibrator']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'NOS_35s:LucF_calibrator']
df4_copy = df4_copy[df4_copy['Promoter'] != 'NOS_calibrator']
#df4_copy = df4_copy[df4_copy['Promoter'] != 'NOS_calibrator_35s:LucF_calibrator']
#df4_copy = df4_copy[df4_copy['Promoter'] != '35s_35s:LucF_calibrator']
#remove row if well calibrator column is NOS_LucF and TF_added column is ANAC032
df4_copy = df4_copy.loc[~((df4_copy['Well_calibrator'] == 'NOS_LucF') & (df4_copy['TF_added'] == 'ANAC032'))]



In [60]:
#NIR1 promoter
#keep NLP6 and NLP7 to NIR1
df4_copy = filter_TFs(df4_copy,'NIR1',['NLP6','NLP7'])
#NLP7 promoter
#keep ANAC032 to NLP7
df4_copy = filter_TFs(df4_copy,'NLP7',['ANAC032'])

TFs=['TGA1:GR_EtOH+DMSO', 'No TF_35uM_CHX+EtOH', 'No TF_10uM_DEX+35uM_CHX', 'No TF_10uM_DEX+35uM_CHX', 'No TF_10uM_DEX+35uM_CHX', 'TGA1:GR_overnight_EtOH+DMSO', 'No TF_35uM_CHX+EtOH', 'TGA1:GR_EtOH+DMSO', 'TGA1:GR_overnight_EtOH+DMSO', 'TGA1:GR_overnight_10uM_DEX+DMSO', 'TGA1:GR_10uM_DEX+DMSO', 'TGA1:GR_10uM_DEX+DMSO', 'TGA1:GR_10uM_DEX+DMSO', 'TGA1:GR_overnight_10uM_DEX+DMSO', 'TGA1:GR_overnight_10uM_DEX+DMSO', 'TGA1:GR_35uM_CHX+EtOH', 'TGA1:GR_35uM_CHX+EtOH', 'TGA1:GR_35uM_CHX+EtOH', 'TGA1:GR_10uM_DEX+35uM_CHX', 'TGA1:GR_10uM_DEX+35uM_CHX', 'TGA1:GR_10uM_DEX+35uM_CHX', 'No TF_35uM_CHX+EtOH', 'TGA1:GR_overnight_EtOH+DMSO', 'No TF_overnight_10uM_DEX+DMSO', 'TGA1:GR_EtOH+DMSO', 'No TF_overnight_10uM_DEX+DMSO', 'No TF_overnight_10uM_DEX+DMSO', 'No TF_EtOH+DMSO', 'No TF_EtOH+DMSO', 'No TF_overnight_EtOH+DMSO', 'No TF_EtOH+DMSO', 'No TF_overnight_EtOH+DMSO', 'No TF_10uM_DEX+DMSO', 'No TF_10uM_DEX+DMSO', 'No TF_10uM_DEX+DMSO', 'No TF_overnight_EtOH+DMSO', 'NLP6', 'No TF', 'NLP6', 'NLP7', 'N

In [61]:
#save the df to a tsv file
df4_copy.to_csv(f'../../data/luminescence/coexpression_data_ablated_promoters.tsv',sep='\t',index=False)

In [62]:
#now add a new column to coexpression_data_for_circos_plot containing fold change of luminescence compared to No TF control for that promoter
#first get the mean luminescence for each promoter and TF_added combination
df_mean = df.groupby(['Promoter','TF_added', 'condition','Well_calibrator','date'])['corrected_luminescence'].mean().reset_index()
#now get the mean luminescence for each promoter and No TF control combination
df_mean_NoTF = df_mean[(df_mean['TF_added'] == 'No TF')|(df_mean['TF_added'] == 'No TF_1μM_NAA')]
#add the No TF mean luminescence to the df_mean df in a new column
df_mean = df_mean.merge(df_mean_NoTF[['Promoter','condition','date','Well_calibrator','corrected_luminescence']],on=['Promoter','condition','Well_calibrator','date'],how='left',suffixes=('','_NoTF'))
#now calculate the positive or negative fold change of luminescence compared to No TF control
# df_mean['fold_change'] = df_mean['corrected_luminescence']/df_mean['corrected_luminescence_NoTF']
df_mean['fold_change'] = df_mean['corrected_luminescence']/df_mean['corrected_luminescence_NoTF']
#now add the fold change to the df in a new column
df_mean['log2_fc'] = np.log2(df_mean['fold_change'])
#now merge the df_mean back into the original df
# df = df.merge(df_mean[['Promoter','TF_added','condition','Well_calibrator','fold_change']],on=['Promoter','TF_added','condition','Well_calibrator'])
#now filter out No TF_1μM_NAA and No TF controls
df_mean = df_mean[(df_mean['TF_added'] != 'No TF_1μM_NAA')&(df_mean['TF_added'] != 'No TF')]



In [63]:
#add TRAM and TARSET data to df_mean

In [64]:
#filter extra data points
df_mean = df_mean[df_mean['TF_added'] != 'HHO2']
df_mean = df_mean[df_mean['TF_added'] != 'bZIP3']

#keep ARFs, ANAC032, ANR1 to ANAC032
df_mean = filter_TFs(df_mean,'ANAC032',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1'])
#keep ANAC032, ANR1 to ANAC032
# df_mean = filter_TFs(df_mean,'ANAC032',['ANAC032','ANR1'])

#Keep ANAC032, ANR1, NLP6 and NLP7 to ANR1
# df_mean = filter_TFs(df_mean,'ANR1',[])
#keep ARFs to ANR1
df_mean = filter_TFs(df_mean,'ANR1',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7'])

#Keep ANAC032, ANR1, NLP6 and NLP7 to ARF18
# df_mean = filter_TFs(df_mean,'ARF18',[])
#keep ARFs to ARF18
df_mean = filter_TFs(df_mean,'ARF18',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7'])

#Keep ANAC032, ANR1, DREB26 , NLP6 and NLP7 to DREB26
# df_mean = filter_TFs(df_mean,'DREB26',[])
#keep ARFs to DREB26
df_mean = filter_TFs(df_mean,'DREB26',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7','DREB26'])

#Keep ANAC032, ANR1, NLP6 and NLP7 to NIR1
# df_mean = filter_TFs(df_mean,'NIR1',[])
#keep ARFs to NIR1
df_mean = filter_TFs(df_mean,'NIR1',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7','NIR1'])

#Keep ANAC032, ANR1, NLP6 and NLP7 to NLP6
# df_mean = filter_TFs(df_mean,'NLP6',[])
#keep ARFs to NLP6
df_mean = filter_TFs(df_mean,'NLP6',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7'])

#Keep ANAC032, ANR1, NLP6 and NLP7 to NLP7
# df_mean = filter_TFs(df_mean,'NLP7',[])
#keep ARFs to NLP7
df_mean = filter_TFs(df_mean,'NLP7',['ARF18','ARF9','ARF9_1μM_NAA','ARF18+ARF9','ARF18+ARF9_1μM_NAA','ARF18+IAA33_1μM_NAA','ARF18+IAA33','ARF18_1μM_NAA','ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF18+ARF9+IAA33_1μM_NAA','ANAC032','ANR1','NLP6','NLP7'])

TFs=['ANAC032', 'ANR1', 'ARF18', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF18_1μM_NAA', 'ARF9', 'ARF9_1μM_NAA', 'DREB26', 'NLP6', 'NLP7']
TFs_in_df=['ARF18', 'ARF9', 'ARF9_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18_1μM_NAA', 'ANAC032', 'ANR1']
TFs=['ANAC032', 'ANR1', 'ARF18', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF18_1μM_NAA', 'ARF9', 'ARF9_1μM_NAA', 'DREB26', 'NLP6', 'NLP7']
TFs_in_df=['ARF18', 'ARF9', 'ARF9_1μM_NAA', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18_1μM_NAA', 'ANAC032', 'ANR1', 'NLP6', 'NLP7']
TFs=['ANAC032', 'ANR1', 'ARF18', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33', 'ARF18+IAA33_1μM_NAA', 'ARF18_1μM_NAA', 'DREB26', 'NLP6', 'NLP7']
TFs_in_df=['ARF18', 'ARF18+ARF9', 'ARF18+ARF9_1μM_NAA', 'ARF18+IAA33_1μM_NAA', 'ARF18+IAA33', 'ARF18_1μM_NAA', 'ANAC032', 'ANR1', 'NLP6', 'NLP7']
TFs=['ANAC032', 'ANR1', 'ARF18',

In [65]:
# #function to filter TFs which aren't in the provided list of TFs
# def filter_TFs(df,promoter,TFs):
#     """function to filter TF-promoter interactions which aren't in the provided list of TFs for a specific promoter"""
#     #first make a copy of the df
#     df_copy = df.copy()
#     #get list of TFs in df
#     df_TFs = df_copy[df_copy.Promoter==promoter].TF_added.values.tolist()
#     #get list of TFs that were present in the provided list of TFs
#     TFs_in_df = [x for x in TFs if x in df_TFs]
#     #append "No TF" to TFs_in_df
#     TFs_in_df.append('No TF')
#     TFs_in_df.append('No TF_1μM_NAA')
#     #append
#     #filter df to only include TFs in TFs
#     promoter_df = df_copy[df_copy.Promoter==promoter]
#     promoter_df = promoter_df[promoter_df.TF_added.isin(TFs_in_df)]
#     #replace subset of the original df with the filtered df
#     df_copy[df_copy.Promoter==promoter] = promoter_df
#     #remove nan
#     df_copy = df_copy.dropna()
#     return df_copy

In [66]:
df_mean

Unnamed: 0,Promoter,TF_added,condition,Well_calibrator,date,corrected_luminescence,corrected_luminescence_NoTF,fold_change,log2_fc
0,ANAC032,ANAC032,nitrogen,35s_LucF,02.11.21+04.11.21,76.782985,57.899469,1.326143,0.407237
1,ANAC032,ANR1,nitrogen,NOS_LucF,02.11.21+04.11.21,99.879069,206.829930,0.482904,-1.050191
2,ANAC032,ARF18,nitrogen,NOS_LucF,02.11.21+04.11.21,137.931547,206.829930,0.666884,-0.584493
3,ANAC032,ARF18+ARF9,nitrogen,NOS_LucF,02.11.21+04.11.21,170.984796,206.829930,0.826693,-0.274577
4,ANAC032,ARF18+ARF9_1μM_NAA,1uM_auxin,NOS_LucF,02.11.21+04.11.21,192.480081,229.963468,0.837003,-0.256696
...,...,...,...,...,...,...,...,...,...
143,NLP7,ARF18_1μM_NAA,1uM_auxin,NOS_LucF,02.11.21+04.11.21,23.104530,27.266601,0.847356,-0.238959
144,NLP7,ARF9,nitrogen,NOS_LucF,02.11.21+04.11.21,32.240190,21.538205,1.496884,0.581962
145,NLP7,ARF9_1μM_NAA,1uM_auxin,NOS_LucF,02.11.21+04.11.21,32.347592,27.266601,1.186345,0.246523
148,NLP7,NLP6,nitrogen,35s_LucF,27.04.22,0.941386,0.698554,1.347622,0.430416


In [67]:
#save the df to a tsv file for use in a circos plot
df_mean.to_csv(f'../../data/luminescence/coexpression_for_manuscript/coexpression_data_for_circos_plot_fold_change.tsv',sep='\t',index=False)

In [68]:
#add significance column
df_mean['significance'] = ""
#for DREB26 promoter, ARF18+ARF9_1μM_NAA write "no" in significance column
df_mean.loc[(df_mean.Promoter=='DREB26')&(df_mean.TF_added=='ARF18+ARF9_1μM_NAA'),'significance'] = 'no'
#Do the same for DREB26	ARF18+IAA33_1μM_NAA
df_mean.loc[(df_mean.Promoter=='DREB26')&(df_mean.TF_added=='ARF18+IAA33_1μM_NAA'),'significance'] = 'no'
#Same for DREB26	ARF18_1μM_NAA
df_mean.loc[(df_mean.Promoter=='DREB26')&(df_mean.TF_added=='ARF18_1μM_NAA'),'significance'] = 'no'
#and DREB26	ARF9+IAA33_1μM_NAA
df_mean.loc[(df_mean.Promoter=='DREB26')&(df_mean.TF_added=='ARF9+IAA33_1μM_NAA'),'significance'] = 'no'
#NLP6	ARF18
df_mean.loc[(df_mean.Promoter=='NLP6')&(df_mean.TF_added=='ARF18'),'significance'] = 'no'
#save file as tsv
df_mean.to_csv(f'../../data/luminescence/coexpression_for_manuscript/coexpression_data_for_circos_plot_fold_change_significant.tsv',sep='\t',index=False)

In [69]:
#make version containing only significant interactions
