In [1]:
# Append root directory to system's path
import sys
sys.path.append('../ARCH_package')

import distributions, filtering

In [2]:
import sys
sys.path.append('../stochastic_package')

import bd_model

In [3]:
import dill
import os
from tqdm import tqdm
from functools import partial
from multiprocessing import Pool

import plotly.graph_objects as go
import plotly.express as px
import plotly.figure_factory as ff

import numpy as np
import pandas as pd

colors = px.colors.DEFAULT_PLOTLY_COLORS

In [4]:
# Load path to Other plots
path = '../Results/LiFT/'
if not os.path.exists(path):
    os.makedirs(path)


# Artifact fit
## Creating time-series synthetic data through a binomial distribution

In [15]:
import plotly.graph_objects as go



In [19]:
np.random.seed(1)
# Artifact length and number of artifacts
n_points = 5
init_age = 70

# Artifact follows a binomial distribution
artifact_prob = -1
while artifact_prob < 0:
    artifact_prob = np.random.normal(loc=0.01, scale=0.001)

reads = np.random.normal(loc=1_500, scale=100, size=n_points).astype(int)

# Create artifacts
# Create 4 time points spaced 3 years appart
ages = np.arange(n_points)*3

# Given a total number of reads and a probability,
# randomly select a number of alternative reads using the binomial distribution
artifact_reads = np.random.binomial(reads, artifact_prob, size=n_points)
artifacts_AF = artifact_reads/reads
artifact_df = pd.DataFrame({'age':ages + init_age,
                            'AF':artifacts_AF, 
                            'DP': reads,
                            'AO': artifact_reads})

# Plot artifact
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=artifact_df.age, y=artifact_df.AF))
fig.update_layout(title='Synthetic artifact',
                  xaxis_title='Age',
                  yaxis_title='AF')
fig.show()

fig.write_image(path + 'synthetic_artifact.svg')
print(f'Artifact probability: {artifact_prob}')

fig = go.Figure(data=[go.Table(header=dict(values=artifact_df.columns),
                 cells=dict(values=[artifact_df.age, round(artifact_df.AF,4), artifact_df.DP, artifact_df.AO]))
                     ])
fig.write_image(path + 'synthetic_artifact_dataframe.svg')
fig.show()

Artifact probability: 0.011624345363663241


## Maximum likelihood model comparison

In [20]:
model_list, optimal_model = filtering.model_comparison(artifact_df,
                                                      lbc=False)
print(f'Optimal model: {optimal_model.model_type}')

print(f'IC relative weights: {optimal_model.IC_weights}')

print(f'artifact nll: {round(model_list[0].nll,3)}, \n'
      f'bd_process nll: {round(model_list[1].nll,3)}')

Optimal model: binomial_artifact
IC relative weights: {'binomial_artifact': 0.02515028722299787, 'bd_process': 0.9748497127770022}
artifact nll: 11.843, 
bd_process nll: 15.5


In [7]:
optimal_model.plots()
if optimal_model.model_type == 'binomial_artifact':
    optimal_model.posterior_plot.show()
    optimal_model.prior_plot.show()
else:
    for conditional in optimal_model.conditional_dist:
        conditional.show()

AttributeError: 'trajectory_model' object has no attribute 'prior'

# Birth and death time series

In [21]:
np.random.seed(1)
# Create a birth and death process
init_size = 1
init_time = np.random.uniform(0, 50)
N_w_bd = np.random.randint(10_000, 50_000)
s_bd = 0.05
delta_t = 3


#ages = np.arange(0,time_points*delta_t, delta_t)
end_AO = 0 
while end_AO <10 :
    trajectory = bd_model.bd(t=init_time, 
                             init_size=init_size,
                             lamb=1.3 + s_bd,
                             tmax=82,
                             from_zero=True)
    
    end_size = trajectory[1,-1]
    if end_size == 0:
        next
    
    # Simulate VAF data using limited read depth
    ages = [70, 73, 76, 79, 82]
    # trajectory = trajectory[:, ages].astype('int')

    # Extract allele propotion
    clone_sizes = trajectory[1, ages]. astype('int')
    total_alleles = 2*(N_w_bd + clone_sizes)
    allele_proportion = clone_sizes / total_alleles

    # Simulate binomial draw
    DP = np.random.normal(loc=1_500, scale=100, size=len(ages)).astype(int)
    AO = np.random.binomial(n=DP, p = allele_proportion)

    # Create dataframe
    bd_df = pd.DataFrame({'age': ages,
                          'AF': AO/DP,
                          'DP': DP,
                          'AO': AO,
                          'clone_size': clone_sizes
                         })
    end_AO = bd_df.iloc[-1].AO
fig = px.line(bd_df, x='age', y='AF', title='Birth and death process').show()
print(f'Mutation acquisition: {round(init_time,2)}, \n'
      f'Fitness: {s_bd}, \n'
      f'Stem cells: {N_w_bd}')

fig.write_image(path + 'synthetic_artifact.svg')

fig = go.Figure(data=[go.Table(header=dict(values=artifact_df.columns),
                 cells=dict(values=[artifact_df.age, round(artifact_df.AF,4), artifact_df.DP, artifact_df.AO]))
                     ])
fig.write_image(path + 'synthetic_artifact_dataframe.svg')
fig.show()

Mutation acquisition: 20.85, 
Fitness: 0.05, 
Stem cells: 22172


AttributeError: 'NoneType' object has no attribute 'write_image'

## Maximum likelihood model comparison

In [22]:
model_list, optimal_model = filtering.model_comparison(bd_df,
                                                      lbc=False)
print(f'Optimal model: {optimal_model.model_type}')

print(f'IC relative weights: {optimal_model.IC_weights}')

print(f'artifact nll: {round(model_list[0].nll,3)}, \n'
      f'bd_process nll: {round(model_list[1].nll,3)}')

Optimal model: binomial_artifact
IC relative weights: {'binomial_artifact': 0.4814027369104351, 'bd_process': 0.5185972630895649}
artifact nll: 14.401, 
bd_process nll: 14.475


In [25]:
optimal_model.plots()

if optimal_model.model_type == 'binomial_artifact':
    optimal_model.posterior_plot.show()
    optimal_model.prior_plot.show()
else:
    for conditional in optimal_model.posterior_dist:
        conditional.show()
    optimal_model.heatmap

AttributeError: 'trajectory_model' object has no attribute 'prior'

# LBC data

In [67]:
# Import non-synonymoous trajectories as exported through with basic.load module
with open('../Exports/LBC_non-synonymous.dill', 'rb') as infile:
    cohort = dill.load(infile)
    
# Find private mutations
mutation_list = []
for part in cohort:
    for traj in part.trajectories:
        mutation_list.append(traj.mutation)

# Count mutations in mutation_list
from collections import Counter
mutation_counter = Counter(mutation_list)

In [68]:
x=[]
y=[]
trajectories = []

for part in cohort:
    outlier_trajectories = []
    for traj in part.trajectories:
        if traj.AF_outlier_pvalue is not None:
            if max(traj.data.AF)<0.55:
                outlier_trajectories.append(traj)
                trajectories.append(traj)
                x.append(traj.AF_outlier_pvalue)
                y.append(traj.data.AF.mean())   
    part.trajectories = outlier_trajectories
fig = go.Figure()
fig.add_trace(
    go.Scatter(x=x, y=y, mode='markers'))

fig.update_layout(xaxis_title='AF_Outlier_pvalue',
                  yaxis_title='mean AF')
fig.write_image(path + 'representation')

In [69]:
from functools import partial
model_list = [filtering.artifact_binom_init,
          filtering.bd_fitness_init]
models = [partial(model, mp=False) for model in model_list] 

model_comparison = partial(filtering.model_comparison, models=models)
with Pool(8) as p:
    trajectories = list(tqdm(p.imap(model_comparison,
                                       trajectories), total=len(trajectories)))

  0%|                                                   | 0/100 [00:03<?, ?it/s]Process ForkPoolWorker-111:
Process ForkPoolWorker-108:
Process ForkPoolWorker-112:

overflow encountered in exp

Process ForkPoolWorker-107:
Process ForkPoolWorker-110:
Process ForkPoolWorker-106:
Traceback (most recent call last):
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
Process ForkPoolWorker-105:
Traceback (most recent call last):
Process ForkPoolWorker-109:
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
Traceback (most recent call last):
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
Traceback (most rec

  File "../ARCH_package/filtering.py", line 519, in artifact_binom_nll
    nll = -binom.logpmf(trajectory.iloc[1: , :].AO, trajectory.iloc[1: , :].DP, p=p).sum()
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/optimize/optimize.py", line 464, in function_wrapper
    return function(np.copy(x), *(wrapper_args + args))
  File "../ARCH_package/filtering.py", line 1795, in <listcomp>
    model_list = [model(trajectory=trajectory.data) for model in models]
    file.write(text)
  File "../ARCH_package/filtering.py", line 1098, in AO_prob_value
    sequencing_prob = binom.pmf(AO, DP, p=allele_proportion_range)
  File "../ARCH_package/filtering.py", line 1584, in bd_marginal_N_nll
    N_w=N_w)
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py", line 3175, in logpmf
    k, loc = map(asarray, (k, loc))
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py",

  File "../ARCH_package/filtering.py", line 1562, in bd_conditional_N_nll
    N_w=N_w)
  File "../ARCH_package/filtering.py", line 1079, in AO_prob_value
    random_state=123)
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/pandas/core/internals/blocks.py", line 1942, in new_block
    values = maybe_coerce_values(values)
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py", line 3117, in rvs
    return super().rvs(*args, **kwargs)
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/pandas/core/internals/blocks.py", line 1878, in maybe_coerce_values
    values = ensure_wrapped_if_datetimelike(values)
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py", line 920, in _argcheck_rvs
    size_ = tuple(np.atleast_1d(size))
  File "/home/elatorre/anaconda3/envs/ARCH/lib/python3.7/site-packages/scipy/stats/_distn_infrastructure.py", line 1064

KeyboardInterrupt: 

In [None]:
color_dict = {'binomial_artifact': 0, 'bd_process':1}
opacity_dict = {'binomial_artifact': 0.3, 'bd_process':1}

fig = go.Figure()
for traj in trajectories:
    if traj.model.IC_weights['bd_process'] > 0.5:
        traj.model.model_type='binomial_artifact'
    else:
        traj.model.model_type='bd_process'

    fig.add_trace(
        go.Scatter(x=traj.data.age, y=traj.data.AF,
                   opacity=opacity_dict[traj.model.model_type],
                   legendgroup=traj.model.model_type,
                   legendgrouptitle_text=traj.model.model_type,
                   name=traj.mutation,
                   hovertext=(f'{traj.model.model_type} <br>'
                              f' {traj.AF_outlier_pvalue} <br>'),
                   marker_color=colors[color_dict[traj.model.model_type]]))
    
fig.update_layout(title='LiFT classification',
                  xaxis_title='Age',
                  yaxis_title='VAF')

fig.show()

In [None]:
fig = go.Figure()
outlier = []
AF = []
bd_prob = []

df = pd.DataFrame(columns=['AF', 'outlier_p_value', 'artifact_prob'])
for traj in trajectories:
    df = df.append({'AF': traj.data.AF.max(),
                    'outlier_p_value': traj.AF_outlier_pvalue,
                    'artifact_prob': traj.model.IC_weights['bd_process'],
                    'mutation': traj.mutation,
                    'prediction': traj.model.model_type},
                  ignore_index=True)
fig = px.scatter(df, x='outlier_p_value', y='AF',
                 color='artifact_prob', hover_name='mutation',
                 hover_data=['prediction'])
fig.show()
fig.write_image(path + 'Model comparison filter.png', scale=10)

In [None]:
mutation_set = set()
for traj in trajectories:
    mutation_set.add(traj.mutation)
    
mutation_dict = dict()
for mutation in mutation_set:
    artifact_prob = []
    for traj in trajectories:
        if traj.mutation == mutation:
            artifact_prob.append(traj.model.IC_weights['bd_process'])

    mutation_dict[mutation] = artifact_prob
    
for key in mutation_dict.keys():
    mutation_dict[key]= np.mean(mutation_dict[key])
    
mutation_dict = dict(sorted(mutation_dict.items() , reverse=True, key=lambda x: x[1]))
mutation_dict

for traj in trajectories:
    traj.artifact_prob = mutation_dict[traj.mutation]
    if traj.artifact_prob < 0.5:
        traj.model.model_type = 'bd_process'
    else:
        traj.model.model_type='binomial_artifact'

In [None]:
fig = go.Figure()
outlier = []
AF = []
bd_prob = []

df = pd.DataFrame(columns=['AF', 'outlier_p_value', 'artifact_prob'])
for traj in trajectories:
    df = df.append({'AF': traj.data.AF.max(),
                    'outlier_p_value': traj.AF_outlier_pvalue,
                    'artifact_prob': traj.artifact_prob,
                    'mutation': traj.mutation,
                    'prediction': traj.model.model_type},
                  ignore_index=True)
fig = px.scatter(df, x='outlier_p_value', y='AF',
                 color='artifact_prob', hover_name='mutation',
                 hover_data=['prediction'])
fig

In [None]:
color_dict = {'binomial_artifact': 0, 'bd_process':1}
opacity_dict = {'binomial_artifact': 0.3, 'bd_process':1,}

fig = go.Figure()
for traj in trajectories:
    if traj.model.model_type == 'binomial_artifact':
        fig.add_trace(
            go.Scatter(x=traj.data.age, y=traj.data.AF,
                       opacity=opacity_dict[traj.model.model_type],
                       legendgroup=traj.model.model_type,
                       legendgrouptitle_text=traj.model.model_type,
                       showlegend=False,
                       name=traj.mutation,
                       hovertext=(f'{traj.model.model_type} <br>'
                                  f' {traj.AF_outlier_pvalue} <br>'),
                       marker_color=colors[color_dict[traj.model.model_type]]))
        
for traj in trajectories:
    if traj.model.model_type == 'bd_process':
        fig.add_trace(
            go.Scatter(x=traj.data.age, y=traj.data.AF,
                       opacity=opacity_dict[traj.model.model_type],
                       legendgroup=traj.model.model_type,
                       legendgrouptitle_text=traj.model.model_type,
                       name=traj.mutation,
                       hovertext=(f'{traj.model.model_type} <br>'
                                  f' {traj.AF_outlier_pvalue} <br>'),
                       marker_color=colors[color_dict[traj.model.model_type]]))
    
fig.update_layout(title='LiFT classification',
                  xaxis_title='Age',
                  yaxis_title='VAF')

fig.show()

# Export results

In [None]:
# Export cohort
with open('../Exports/model_comparison_outliers.dill', 'wb') as outfile:
    dill.dump(trajectories, outfile)

# Import results

In [None]:
# # Export cohort
# with open('../Exports/model_comparison.dill', 'rb') as infile:
#     trajectories = dill.load(infile)