In [1]:
# use qpcr conda environment
import pandas as pd
import numpy as np
import math
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib import rcParams
import seaborn as sns
from statannotations.Annotator import Annotator
import os
from scipy import stats
#cycle through alphabet
from string import ascii_uppercase as alc

In [2]:
#need to add ARF plots into subplots for each promoter - make them on a row of 3 plots

In [3]:
#function to read in data
def read_data(file,sep):
    data = pd.read_csv(file, sep=sep, header=0)
    return data

In [4]:
def add_promoter_column(df,name,promoter):
    df.loc[df.name.str.match(fr'(^{name}.*)')==True, 'promoter'] = promoter
    

In [5]:
def combine_plates(plateA_loc,plateB_loc,plateC_loc,data_dir):
    plateA = read_data(plateA_loc, ',')
    plateB = read_data(plateB_loc, ',')
    plateC = read_data(plateC_loc, ',')
    #remove last character of name column if it is a letter
    plateA['name'] = plateA['name'].str.replace(r'[A-Z]$', '')
    plateB['name'] = plateB['name'].str.replace(r'[A-Z]$', '')
    plateC['name'] = plateC['name'].str.replace(r'[A-Z]$', '')
    #add plate column
    plateA['plate'] = 'plateA'
    plateB['plate'] = 'plateB'
    plateC['plate'] = 'plateC'

    #concatenate dataframes
    df = pd.concat([plateA,plateB,plateC],ignore_index=True)
    # df = plateA.merge(plateB, on=['name','condition','plate'], how='outer')
    # df = df.merge(plateC, on=['name','condition','plate'], how='outer')
    #filter columns to keep - name condition nluc/absorbance plate
    df = df[['name','condition','nluc/absorbance','plate']]
    #sort by name and condition
    df = df.sort_values(by=['name','condition'])
    #reset index
    df = df.reset_index(drop=True)
    #add promoters to plate df - for the following names: 
    add_promoter_column(df, '205-3', 'NRP')
    add_promoter_column(df, '205-7', 'NRP')
    add_promoter_column(df, '205-9', 'NRP')
    add_promoter_column(df, '206-3', 'NIR1')
    add_promoter_column(df, '206-4', 'NIR1')
    add_promoter_column(df, '206-8', 'NIR1')
    add_promoter_column(df, '207-1', '35s')
    add_promoter_column(df, '207-3', '35s')
    add_promoter_column(df, '207-10', '35s')
    add_promoter_column(df, '208-7', '4x[NRE-TATA]')
    add_promoter_column(df, '208-8', '4x[NRE-TATA]')
    add_promoter_column(df, '208-9', '4x[NRE-TATA]')
    add_promoter_column(df, '209-5', '4x[bZIP3-random]')
    add_promoter_column(df, '209-7', '4x[bZIP3-random]')
    add_promoter_column(df, '209-10', '4x[bZIP3-random]')
    add_promoter_column(df, 'Col-0-1', 'NA')
    #rename name column value Col-0-1 to Col-0
    df.loc[df.name.str.match(fr'(^Col-0-1.*)')==True, 'name'] = 'Col-0'


    #rename name column to plant_line
    df = df.rename(columns={'name':'plant_line'})

    #save df
    df.to_csv(data_dir + 'combined.csv', index=False)
    return df


In [6]:
#make plott(prom,fontsize,axes[axes_count])
def make_plot(prom,df, fontsize,ax,plot_title=None):
    """function to make and save plots"""
    # plt.figure(figsize=(fig_width,fig_height))
    set_rc_params()
    #get list of plant_lines
    plant_line_orig = df['plant_line'].unique()
    #sort alphabetically, with Col-0 first
    plant_lines = sorted(plant_line_orig, key=str.lower)
    plant_lines = sorted(plant_line_orig, key='Col-0'.__eq__, reverse=True)
    pairs = []
    for i in range(len(plant_lines)):
        if i != 'Col-0':
            pairs.append(((plant_lines[i], '0mM_nitrate'),(plant_lines[i], '1mM_nitrate')))
            pairs.append(((plant_lines[i], '0mM_nitrate'),(plant_lines[i], '10mM_nitrate')))
            pairs.append(((plant_lines[i], '1mM_nitrate'),(plant_lines[i], '10mM_nitrate')))




    
    fig_args = {'x':'plant_line', 'y':'nluc/absorbance','data':df, 'hue':'condition','hue_order':['0mM_nitrate','1mM_nitrate','10mM_nitrate'],'order':plant_lines, 'dodge':True,}#'ax':ax
    configuration = {'test':'t-test_welch', 'text_format':'star', 'pvalue_thresholds':[[1e-3, "***"],[1e-2, "**"],[0.05, "*"],[1, "ns"]]}#"pairs":list(box_pairs_significant.keys()),"pvalues":list(box_pairs_significant.values()), 'loc':'inside'
    sns.barplot(**fig_args, linewidth=2,  errcolor="black", edgecolor="black", ci=68, errwidth=1,capsize=0.2,color='lightgrey',ax=ax,palette=["white", "lightgrey",'grey'])#facecolor=(1,1,1,1),
    #fig.set_aspect(0.5)color='#BFBFBF'
    sns.swarmplot(**fig_args,color='black',ax=ax, palette=["black"])
    # #add stats
    annotator = Annotator(ax, pairs, **fig_args,verbose=False, show_non_significant=False)
    annotator.configure(**configuration)#loc='inside'
    ax, test_results = annotator.apply_and_annotate()
    #write stats to file
    with open(f'../../../../data/plots/luminescence/root_luc/stats.txt', 'a') as f:                            
                for res in test_results:                    
                    f.write(f'{str(prom)},{str(res.data.group1)},{str(res.data.group2)},{str(res.data)},t={res.data.stat_value}, P={res.data.pvalue}\n{res.structs} \n\n') 
    # #get means of plant_line nluc/absorbance
    # means = df.groupby(['plant_line'])['nluc/absorbance'].mean()
    # #turn into df
    # means_df = pd.DataFrame(means)
    # #reset index
    # means_df.reset_index(inplace=True)
    # #get TF and No TF means
    # # no_tf_mean = means_df[means_df.TF_added == 'No TF']['corrected_luminescence_normalised'].values[0]
    # # tf_mean = means_df[means_df.TF_added == TF]['corrected_luminescence_normalised'].values[0]
    # #get standard deviation
    # std = df.groupby(['plant_line'])['nluc/absorbance'].std()
    # #turn into df
    # std_df = pd.DataFrame(std)
    # #reset index
    # std_df.reset_index(inplace=True)
    # #get tf and no tf standard deviation
    # no_tf_std = std_df[std_df.TF_added == 'No TF']['corrected_luminescence_normalised'].values[0]
    # tf_std = std_df[std_df.TF_added == TF]['corrected_luminescence_normalised'].values[0]


    # with open(f'../../../../data/plots/luminescence/{folder}/individual{output_dir}/stats.txt', 'a') as f:                            
    #     for res in test_results:
    #         # print(res.pvalue)pvalue
    #         # print(vars(res.data))
    #         f.write(f'{str(prom)},{pair},{str(res.data)},t={res.data.stat_value}, P={res.data.pvalue}\nmean of no TF = {no_tf_mean}\nmean of TF = {tf_mean}\nstd of no TF = {no_tf_std}\nstd of TF = {tf_std}\n\n')
    # change axes labels
    ax.set_ylabel('LucN luminescence (a.u.)')
    ax.set_xlabel('Plant line')
    #rotate axes labels
    ax.set_xticklabels(ax.get_xticklabels(), rotation=90, ha="center")
    #colour list
  

    # ax.set_title(plot_title)
    #add title
    #plt.title(prom[:len(prom)-20], y=1,size=fontsize)
    if plot_title==None:
        pass
        # ax.set_title(f'Co-expression\n 35s:{TF[1:]} + {prom}p:LucN', y=1,size=fontsize)
    if plot_title!=None:
        ax.set_title(plot_title, y=1,size=fontsize)

    # #if ARFs = True:
    # if ARFs==True:
    #     pass
    #remove swarmplot points from legend
    handles, labels = ax.get_legend_handles_labels()
    #rename labels
    #  _ = ax.set_xlabel(r'KNO$_{3}$ concentration (mM)')

    # #make xticks diagonal
    # # _ = plt.xticks(rotation=90, ha='center')

    # #plot legend, excluding legend from swarm plot
    # h,l = ax.get_legend_handles_labels()
    # #change name of label
    # #l[3] = "10 mM nitrate"
    # l[3] = sample
    # l[2] = "Col-0"
    labels = ['','','',r'0 mM KNO$_{3}$',r'1 mM KNO$_{3}$',r'10 mM KNO$_{3}$']
    ax.legend(handles=handles[3:], labels=labels[3:], loc='upper left', fontsize=fontsize-2, frameon=False)
    # #save plot
    

    #make y axis start at 0
    ax.set_ylim(bottom=0)
    #make subplot xticks diagonal 

    # ax.set_xticklabels(, rotation=90, ha="right",rotation_mode="anchor")
    # plt.xticks(rotation=90, ha='center')#ha='right'
    #plt.xlabel(x_label)
    # tight layout
    # plt.tight_layout()
    # save figure
    
    #don't show plot in notebook
    # plt.cla()  # clear axis              
    # plt.close('all')    

In [7]:
#function to make subplots and iterate over promoters
def make_subplots(df,fontsize):
    """function to make subplots"""
    
    # # get prom_names as list
    # df_unfiltered = df.copy()
    # #filter df to exclude TFs in filter_tf_list

    # filter_tf_list = ['ARF9','ARF18','ARF18+ARF9','ARF9_1μM_NAA','ARF18_1μM_NAA','ARF18+ARF9_1μM_NAA','No TF_1μM_NAA','ARF18+ARF9+IAA33_1μM_NAA','ARF18+ARF9+IAA33','ARF9+IAA33_1μM_NAA','ARF9+IAA33','ARF18+IAA33_1μM_NAA','ARF18+IAA33']#'ARF18+IAA33_1μM_NAA','ARF18+IAA33'
    # df = df[~df['TF_added'].isin(filter_tf_list)]

    proms = list(df['promoter'].unique())

    number_of_subplots = len(proms)+1
    #make subplots for each promoter
    number_of_rows = number_of_subplots-2
    height = 4.8*number_of_rows
    width = 10
    #make subplots equal to number_of_subplots
    #fig = plt.figure(constrained_layout=True)
    fig, axes = plt.subplots(nrows=number_of_rows, ncols=1, figsize=(width, height), sharex=False)
    
    #flatten axis array
    axes = axes.flatten()
    axes_count = 0

    
    #iterate over promoters
    for prom in proms:
        if prom != 'NA':
            #make promoter df
            promoter_df = df[(df['promoter']==prom)|(df['promoter']=='NA')]
            #list plant_lines
            plant_lines = list(promoter_df['plant_line'].unique())

            
            #make plot
            make_plot(prom,promoter_df,fontsize,axes[axes_count],plot_title=prom)
            axes_count+=1


    letter_count = 1
    for ax in axes:
        
        #if letter_count is higher than number of subplots, remove axis
        if letter_count >= number_of_subplots-1:
            ax.axis('off')
            # letter_count += 1
        else:
            letter = alc[letter_count]
            ax.text(-0.1, 1.2, letter, transform=ax.transAxes, fontsize=18,  va='top', ha='right')#fontweight='bold'
            letter_count += 1
    
    #save figures
    plt.tight_layout()
    plt.savefig(
        f'../../../../data/plots/luminescence/root_luc/plots/combined_barplot.pdf',
        format="pdf",
        bbox_inches="tight",transparent=True)
    plt.savefig(
        f'../../../../data/plots/luminescence/root_luc/plots/combined_barplot.svg',
        format="svg",
        bbox_inches="tight",transparent=True)
    #remove plot
    plt.cla()  # clear axis
    plt.close('all')

In [8]:
#set matplotlib rc parameters
def set_rc_params():
    #set matplotlib default parameters
    rcParams['xtick.major.width'] = 2
    rcParams['ytick.major.width'] = 2
    rcParams['axes.linewidth'] = 2
    rcParams['lines.linewidth'] = 2
    #remove top and right lines
    rcParams['axes.spines.top'] = False
    rcParams['axes.spines.right'] = False
    #font size
    fontsize = 20
    rcParams['font.size'] = fontsize
    #for getting the microsoft font Arial working, please follow this guide: https://alexanderlabwhoi.github.io/post/2021-03-missingfont/
    rcParams['font.family'] = 'sans-serif'
    rcParams['font.sans-serif'] = ['Arial']
    #allow font to be edited later in pdf editor
    #make svg text editable
    rcParams['svg.fonttype'] = 'none'
    rcParams ['pdf.fonttype'] = 42 
    #align y-axis top most tick with end of axis
    rcParams['axes.autolimit_mode'] = 'round_numbers'
    #set margins to ensure any error bars fit
    rcParams['axes.xmargin'] = 0.2
    rcParams['axes.ymargin'] = 0.2
    #define bar width
    #bar_width = 0.65
    #allow math text to be displayed
    #rcParams['mathtext.default'] = 'regular'
    return fontsize
    

In [9]:
#main function
def main():
    #data location
    data_dir = '../../../../data/luminescence/root_lucN_luminescence/13.10.22/'
    plateA_loc = f'{data_dir}plateA_output_raw.csv'
    plateB_loc = f'{data_dir}plateB_output_raw.csv'
    plateC_loc = f'{data_dir}plateC_output_raw.csv'
    
    #delete the stats file if it exists
    stats_file = f'../../../../data/plots/luminescence/root_luc/stats.txt'
    if os.path.exists(stats_file):
        os.remove(stats_file)
    
 
    # #delete the stats file if it exists
    # stats_file = f'{data_dir}/stats.txt'

    # if os.path.exists(stats_file):
    #     os.remove(stats_file)
      

    #make new folder
    #create individual plot folder name
    #make directory for the plots to be exported to
    dirName = f'../../../../data/plots/luminescence/root_luc'
    try:
        # Create target Directory
        os.mkdir(dirName)
        print("Directory " , dirName ,  " created") 
    except FileExistsError:
        print("Directory " , dirName ,  " already exists")

    dirName = f'../../../../data/plots/luminescence/root_luc/plots'
    try:
        # Create target Directory
        os.mkdir(dirName)
        print("Directory " , dirName ,  " created") 
    except FileExistsError:
        print("Directory " , dirName ,  " already exists")

    df = combine_plates(plateA_loc,plateB_loc,plateC_loc,data_dir)


    
    # #make plots
    
    fontsize = set_rc_params()


    # #df_filtered = df_filtered[~df_filtered['TF_added'].isin(filter_tf_list)]
    make_subplots(df, fontsize)




In [10]:
#main function call
if __name__ == '__main__':
    main()

Directory  ../../../../data/plots/luminescence/root_luc  already exists
Directory  ../../../../data/plots/luminescence/root_luc/plots  already exists


  stat, pval = self._func(group_data1, group_data2, *self.args,
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
  var *= np.divide(n, n-ddof)  # to avoid error on division by zero
