# Examining the Distribution of Fitness Effects Across P. falciparum life stages 

In [1]:
import pandas as pd
import numpy as np
import scipy.stats as stats 
from matplotlib import pyplot as plt
import seaborn as sns
import os

Examination of DFE-alpha output: Adaptive Divergence and Q [P(fixation deleterious allele)]

In [None]:
# load M3 genes into DF 
os.chdir("") # insert path to gene set 
stages = ['sporozoite', 'ring', 'trophozoite', 'schizont', 'gametocyte', 'ookinete']
gs = []
for stage in stages: 
    fname = 'gene_sets_m3_final/' + stage + '.txt'
    gene_set = np.loadtxt(fname, dtype = str)
    gs.append(gene_set)

gs = pd.DataFrame(gs).T
gs.columns = stages
gs = pd.melt(gs).dropna()
gs.columns = ['stage', 'gene']
gs

Unnamed: 0,stage,gene
0,sporozoite,PF3D7_1354700
1,sporozoite,PF3D7_1023800
2,sporozoite,PF3D7_1137800
3,sporozoite,PF3D7_0902900
4,sporozoite,PF3D7_0304700
...,...,...
295,ookinete,PF3D7_1449000
296,ookinete,PF3D7_0918700
297,ookinete,PF3D7_0102700
298,ookinete,PF3D7_1329700


In [None]:
base = 'final_output/dfe_alpha_results_' # insert path to DFE output (txt files)
tags = ['Fixation prob of deleterious mutation', 'adaptive_divergence', 'Neutral divergence', 'Selected divergence']

def dfe(pop):
    alpha = []
    omega = []
    q = []
    ad_div = []
    neut_div = []
    sel_div = []
    jn_stage = []
    jn_gene = []
    for stage, gene in zip(gs['stage'], gs['gene']):  
        jn_stage.append(stage)
        jn_gene.append(gene)
        fn = base + stage + '_' + pop + '_exclude_' + gene + '.txt'
        with(open(fn)) as file: 
            for line in file: 
                line = line.rstrip()
                if (tags[0] in line): 
                    q0 = np.float64(line.split('mutation ')[1])
                    q.append(q0)
                elif(tags[1] in line):
                    ad = float(line.split(' ')[1])
                    alpha0 = float(line.split(' ')[3])
                    omega0 = float(line.split(' ')[5])
                    ad_div.append(ad)
                    alpha.append(alpha0)
                    omega.append(omega0)
                elif(tags[2] in line):
                    nd = float(line.split('divergence ')[1])
                    neut_div.append(nd)
                elif(tags[3] in line):
                    sd = float(line.split('divergence ')[1])
                    sel_div.append(sd)
    cid = [pop]*len(gs)
    dfedf = pd.DataFrame([cid, jn_stage, jn_gene, q, alpha, omega, ad_div, neut_div, sel_div]).T
    dfedf.columns = ['country', 'stage', 'excluded_gene', 'Q', 'alpha', 'omega', 'adaptive_divergence', 'neutral_divergence', 'selected_divergence']
    return(dfedf)

a_ghana = dfe('ghana')
a_ghana
#a_drc = dfe('drc')
#a_tan = dfe('tanzania')
#a = pd.concat([a_drc, a_tan, a_ghana])

Unnamed: 0,country,stage,excluded_gene,Q,alpha,omega,adaptive_divergence,neutral_divergence,selected_divergence
0,ghana,sporozoite,PF3D7_1354700,0.000142,0.921249,0.331471,0.024751,0.074671,0.026867
1,ghana,sporozoite,PF3D7_1023800,0.000142,0.918123,0.319262,0.022702,0.071109,0.024727
2,ghana,sporozoite,PF3D7_1137800,0.000141,0.92225,0.333818,0.024369,0.073,0.026423
3,ghana,sporozoite,PF3D7_0902900,0.000141,0.921695,0.332516,0.02397,0.072087,0.026006
4,ghana,sporozoite,PF3D7_0304700,0.000142,0.921744,0.334037,0.02434,0.072865,0.026406
...,...,...,...,...,...,...,...,...,...
179,ghana,ookinete,PF3D7_1449000,0.000016,0.989798,0.307483,0.018367,0.059734,0.018557
180,ghana,ookinete,PF3D7_0918700,0.000016,0.989536,0.301338,0.018086,0.06002,0.018278
181,ghana,ookinete,PF3D7_0102700,0.000016,0.989703,0.305953,0.018311,0.05985,0.018502
182,ghana,ookinete,PF3D7_1329700,0.000016,0.990141,0.31934,0.018035,0.056476,0.018215


In [29]:
a_ghana.to_csv("dfe_files/DFE_results_m3_final_reich_syn_no_demoexp_figs12.csv")