# Single fitness effect model

In [1]:
import evolutionary_functions as ev

import numpy as np
import matplotlib.pyplot as plt
import scipy.special
import scipy.integrate as it
from scipy.stats import kde
import pandas as pd
from scipy.special import gamma
from IPython.display import clear_output
import scipy.optimize as opt
import os
import datetime

# plotting defaults
import matplotlib
matplotlib.rc_file_defaults()
matplotlib.rcParams['pdf.fonttype'] = 42
matplotlib.rcParams['font.family'] = 'Lato'


## Import anonymised data

In [2]:
# number of quantiles for plotting: choose 3 or 4
number_of_quantiles = 4

# import age distribution and quantile labels
biobank_bins_quantiles = pd.read_csv('inputs_participants/biobank_bins_quantiles.tsv', sep='\t').rename(columns={f'quantile_labels_{number_of_quantiles}': 'quantile_labels'})[['Age.when.attended.assessment.centre_v0', 'age_count', 'quantile_labels']]
number_of_individuals = biobank_bins_quantiles.age_count.sum()

In [3]:
ukb_variants = pd.read_csv('inputs_variants/ukb_healthy_curated_top20_variants_trimmed.tsv', sep='\t', low_memory=False)
#ukb_variants = ukb_variants.drop(columns={'quantile_labels'})

# attach age quantile data
ukb_variants_quantiles = pd.merge(ukb_variants, biobank_bins_quantiles[['Age.when.attended.assessment.centre_v0', f'quantile_labels']], on='Age.when.attended.assessment.centre_v0', how='left')

# create useful dictionary of variant dataframes
ukb_variants_dict = {}
for varid in ukb_variants_quantiles.varID.unique():
    ukb_variants_dict[varid] = ukb_variants_quantiles[ukb_variants_quantiles['varID'] == varid].reset_index(drop=True)


In [10]:
ukb_variants_quantiles[ukb_variants_quantiles.varID == 'DNMT3A Y735C'].VAF.min()

0.03125

### Results: least-squares optimiser

Import results of least-squares optimiser

In [4]:
filename = 'results_files/lsq_optimisation_top20_trimmed.tsv'
opt_results = pd.read_csv(filename, sep='\t')

Calculate predicted density based on optimised results for each variant: takes 10-20 minutes

In [5]:
N = 100000
naive_overall_dict = {}
naive_quantiles_dict = {}
for variant_name in ukb_variants_dict.keys():
    if variant_name not in naive_overall_dict.keys():
        clear_output()
        print(variant_name)
        variant_quantiles = ukb_variants_dict[variant_name]
        opt_row = opt_results[opt_results.varID == variant_name]
        mu, s = [opt_row['mu'].squeeze(), opt_row['s'].squeeze()]
        naive_overall_dict[variant_name], naive_quantiles_dict[variant_name] = ev.single_fitness_predictions_calc(mu, s, N, variant_quantiles, biobank_bins_quantiles, number_of_quantiles)

DNMT3A R882H


KeyboardInterrupt: 

In [None]:
fig, ax = plt.subplots(4, 5, figsize=[20, 15])
ax = ax.ravel()

for i, varid in enumerate(ukb_variants_dict.keys()):
    variant_quantiles = ukb_variants_dict[varid]
    opt_row = opt_results[opt_results.varID == varid]
    mu, s = [opt_row['mu'].squeeze(), opt_row['s'].squeeze()]
    s_pc = s*100
    ev.overall_density_plot(ax[i], variant_quantiles, mu, naive_overall_dict[varid], number_of_individuals,
                         colour='k')
    ax[i].text(0.4, 1e5, fr'$\mu$ = {mu:.2g}'+'\n'+f's = {s_pc:.3g}%')
    ax[i].set_title(varid, fontsize=15, loc='left')
    ax[i].set_xlabel('VAF')
    ax[i].set_ylabel('reverse cumulative density')
    ax[i].set_yscale('log')
    ax[i].set_ylim(1e2, 1e6)
    ax[i].set_xlim(0, 0.6)
plt.tight_layout()
plt.show()


In [None]:
fig, ax = plt.subplots(4, 5, figsize=[20, 15])
ax = ax.ravel()
quantile_colour_list = ['tab:blue', '#e39b00', 'tab:green', 'C3']
for i, varid in enumerate(ukb_variants_dict.keys()):
    variant_quantiles = ukb_variants_dict[varid]
    opt_row = opt_results[opt_results.varID == varid]
    mu, s = [opt_row['mu'].squeeze(), opt_row['s'].squeeze()]
    ev.quantile_density_plot(ax[i], variant_quantiles, mu, naive_quantiles_dict[varid], number_of_quantiles, quantile_colour_list, biobank_bins_quantiles)
    ax[i].set_title(varid, fontsize=15, loc='left')
    ax[i].set_xlabel('VAF')
    ax[i].set_ylabel('reverse cumulative density')
    ax[i].set_yscale('log')
    ax[i].set_ylim(1e2, 1e6)
    ax[i].set_xlim(0, 0.6)
plt.tight_layout()
plt.show()


#