In [1]:
import pandas as pd
import numpy as np
import numpy.random as nrand
import matplotlib.pyplot as plt
from matplotlib.ticker import (MultipleLocator, AutoMinorLocator)

In [3]:
# Load and combine data
Gene_list = ['ADA2','PRS3','ASC1','RAD6','BFR1','RPL29', \
            'BUD23','RPL39','CCW12','RPS7A','EOS1','SNF6','GET1','TSR2','GIM5','VMA21','IES6','VMA7','LSM1']
df_rep = pd.DataFrame()
for Gene in Gene_list:
    df_gene = pd.read_csv(f'../All_fitness_values_of_genes/{Gene}_all_fitness_values.txt', 
                     delim_whitespace=True)
    df_gene['Gene'] = Gene
    df_rep = pd.concat([df_rep,df_gene],ignore_index=True)
df_rep = df_rep[df_rep['Mutation_type'] != 'Nonsense_mutation']

In [4]:
# Calculate std and mean of fitness
env_list = ['YPD','SC_37','YPD_H2O2',"YPE"]
for env in env_list:
    df_rep[f'{env}_fitness_std'] = \
        df_rep.filter(regex=(f"Fitness_from_{env}_replicate_.*")).std(axis=1)
    df_rep[f'{env}_fitness'] = \
        df_rep.filter(regex=(f"Fitness_from_{env}_replicate_.*")).mean(axis=1)

In [6]:
# Calculate standard error of mean fitness
df_rep['YPD_fitness_se'] = df_rep['YPD_fitness_std']/np.sqrt(4)
df_rep['SC_37_fitness_se'] = df_rep['SC_37_fitness_std']/np.sqrt(3)
df_rep['YPD_H2O2_fitness_se'] = df_rep['YPD_H2O2_fitness_std']/np.sqrt(3)
df_rep['YPE_fitness_se'] = df_rep['YPE_fitness_std']/np.sqrt(3)
df_rep.loc[df_rep['Gene'] == 'SNF6','YPE_fitness_se'] = \
    df_rep.loc[df_rep['Gene'] == 'SNF6','YPE_fitness_std']/np.sqrt(2)


In [7]:
non_idx = df_rep['Mutation_type'] == 'Nonsynonymous_mutation'
syn_idx = df_rep['Mutation_type'] == 'Synonymous_mutation'

In [None]:
legend_label = {'YPD':'YPD','H2O2':'YPD+$\mathregular{H_2O_2}$',
                'SC':'SC+37°C',"YPE":'YPE','Lowest':'Rotating among the above conditions'}
fig,ax = plt.subplots(figsize=[6,5], facecolor='white')
params = {'mathtext.default': 'regular' }          
plt.rcParams.update(params)

# Resample fitness from mean fitness and standard error
fitness_list_dict = {}
fitness_list_dict['YPD'] = nrand.normal(loc = df_rep['YPD_fitness'], 
                                scale= df_rep['YPD_fitness_se'], 
                                size=[1000,len(df_rep)])
fitness_list_dict['SC'] = nrand.normal(loc = df_rep['SC_37_fitness'], 
                                scale= df_rep['SC_37_fitness_se'], 
                                size=[1000,len(df_rep)])
fitness_list_dict['H2O2'] = nrand.normal(loc = df_rep['YPD_H2O2_fitness'], 
                                scale= df_rep['YPD_H2O2_fitness_se'], 
                                size=[1000,len(df_rep)])
fitness_list_dict['YPE'] = nrand.normal(loc = df_rep['YPE_fitness'], 
                                scale= df_rep['YPE_fitness_se'], 
                                size=[1000,len(df_rep)])

# Calculate lowest fitness across environments
fitness_list_dict['Lowest'] = np.min([fitness_list_dict['YPD'],
                                      fitness_list_dict['SC'],
                                      fitness_list_dict['H2O2'],
                                      fitness_list_dict['YPE']],axis=0)

# Calculate dN/dS
dNdS_dict = {env:[] for env in ['YPD','SC','H2O2',"YPE",'Lowest']}
for i,env in enumerate(['YPD','SC','H2O2',"YPE",'Lowest']):    
    dNdS_mean_list = []
    dNdS_std_list = []
    dNdS_list = []
    cutoff_list = np.linspace(0.97,0.99,11)
    if_lowest = 0
    for cutoff in cutoff_list:
        dN_list = np.sum((fitness_list_dict[env] > cutoff) \
                & (df_rep['Mutation_type'] == 'Nonsynonymous_mutation').to_numpy(),axis=1) \
                / np.sum(df_rep['Mutation_type'] == 'Nonsynonymous_mutation')
        
        dS_list = np.sum((fitness_list_dict[env] > cutoff) \
                & (df_rep['Mutation_type'] == 'Synonymous_mutation').to_numpy(),axis=1) \
                / np.sum(df_rep['Mutation_type'] == 'Synonymous_mutation')
        dNdS_list = dN_list/dS_list
        dNdS_mean_list.append(dNdS_list.mean())
        dNdS_std_list.append(dNdS_list.std())
        dNdS_dict[env].append(dNdS_list)
        
    # Ploting
    ax.set_ylim(0.82,1.08)
    ax.set_ylabel('Expected $\mathit{d}_{N}/\mathit{d}_{S}$',size=13)
    ax.set_xlabel('Fitness cutoff',size=13) 
    ax.errorbar(cutoff_list+((i+1)%5)*0.0001, dNdS_mean_list, yerr=np.array(dNdS_std_list)*1.96, 
                 label=legend_label[env], capsize=2, fmt='o-',ms=4)
    legend = ax.legend(title="Environments:",title_fontsize = 'medium',edgecolor='white')
    legend._legend_box.align = "left"
    legend.get_frame().set_alpha(None)
    legend.get_frame().set_facecolor((0, 0, 1, 0))
    legend.get_frame().set_linewidth(0)

# Add annotation to the plot
for i, cutoff in enumerate(cutoff_list):
    lowest_flag = True
    higher_flag = False
    for env in ['YPD','SC','H2O2','YPE']:
        pval = (dNdS_dict['Lowest'][i]>dNdS_dict[env][i]).sum()/len(dNdS_dict['Lowest'][i])
        if pval>0.95:
            higher_flag = True
            lowest_flag = False
            break
        elif pval>0.05:
            lowest_flag = False
    if lowest_flag:
        ax.text(cutoff,0.82,'*',color='k',ha='center')
    elif higher_flag:
        ax.text(cutoff,0.82,'*',color='r',ha='center')

ax.xaxis.set_major_locator(MultipleLocator(0.01))
ax.xaxis.set_major_formatter('{x:.2f}')
ax.xaxis.set_minor_locator(MultipleLocator(0.002))
ax.tick_params(which='major', length=7)
ax.tick_params(which='minor', length=4)

#plt.savefig('empirical_dNdS.pdf')