In [2]:
import sys
sys.path.append("../ARCH_package")

import basic

import numpy as np
import pandas as pd
import dill
import os

import plotly.graph_objects as go

In [3]:
# Create path for exporting
path = f'../Results/Clinical/'
if not os.path.exists(path):
    os.makedirs(path)

# Clinical management
## Next appointment

In [3]:
# Create functions and variables

from tqdm import tqdm
vaf_range = np.linspace(0.001, 0.02, 1000)
time_range = np.linspace(0, 5, 10_000)
time_range = time_range[1:]

def fitness_evolution_low_std(t, fitness = 0.1, vaf = 0.02):

    mean_term = vaf*np.exp(fitness*t)
    var_term = vaf*(2*1.3+fitness)*np.exp(fitness*t)*(np.exp(fitness*t)-1)/(fitness*2*100_000)

    return mean_term - 2*np.sqrt(var_term)

def sequencing_high_std(vaf = 0.02, resolution = 500):

    mean_term = vaf
    var_term = vaf*(1-vaf)/resolution

    return mean_term + 2*var_term


In [4]:
fig = go.Figure()
fitness_labels = ['5%', '10%', '15%', '20%']
fitness_range = [0.05, 0.1, 0.15, 0.2]

for fitness, fitness_label in zip(fitness_range, fitness_labels):
    time_to_next_observation = []
    allowed_vaf = []
    for vaf in vaf_range:
        fluctuations = fitness_evolution_low_std(time_range,
                                                 fitness=fitness,
                                                 vaf=vaf)
        #sequencing = sequencing_high_std(vaf=vaf)
        difference = fluctuations - vaf

        index = [i for i, x in enumerate(difference) if x > 0]
        if index:
            fluctuations_time = time_range[index[0]]
            time_to_next_observation.append(fluctuations_time)
            allowed_vaf.append(vaf)
    fig.add_trace(
            go.Scatter(x=time_to_next_observation,
                        y=allowed_vaf,
                        name= fitness_label))

fig.update_layout(title='Time to next clinical observation',
                  xaxis_range=[0, 5],
                  xaxis_title='Time in years',
                  yaxis_title='VAF')
fig.update_layout(margin=dict(pad=4))

fig.show()
fig.write_image(path + 'next_observation.png', scale=10)
fig.write_image(path + 'next_observation.svg')

## Minimum fitness

In [7]:
time_range = np.linspace(0.1, 5, 100)
vaf_range = [0.01, 0.05, 0.1 ]

fig = go.Figure()

for vaf in vaf_range:
    min_fitness = []
    for t in time_range:
        fitness_range = np.linspace(0.01,0.5, 10_000)

        fluctuations = fitness_evolution_low_std(t,
                                                fitness=fitness_range,
                                                vaf=vaf)
        difference = fluctuations - vaf
        index = [i for i, x in enumerate(difference) if x > 0]

        min_fitness.append(fitness_range[index[0]])

    fig.add_trace(
        go.Scatter(x=time_range, y=min_fitness, name=vaf))

fig.update_layout(title='Minimum detectable fitness',
                  xaxis_title='Time in years',
                  yaxis_title='Fitness',
                  legend=dict(title='VAF of mutation'),
                  margin=dict(pad=4))
fig.show()
fig.write_image(path + 'minimum_detectable fitness.png', scale=10)
fig.write_image(path + 'minimum_detectable fitness.svg' )

# 2 Data points

Inferred distributions of fitness in trajectories with 2 observations vs >2.

In [1]:
# Import non-synonymous mutations as exported in LiFT.py
with open('../Exports/LBC_non-synonymous_LiFT_fitted.dill', 'rb') as infile:
    lbc = dill.load(infile)

NameError: name 'dill' is not defined

In [4]:
df = pd.DataFrame()

def observations_label (obs):
    if obs <3:
        return "<2"
    else:
        return ">2"

for part in lbc:
    for traj in part.trajectories:
        df = df.append({"gene": traj.gene,
                        "fitness": traj.fitness,
                        "fitness_quantile_length": traj.fitness_quantiles[1]- traj.fitness_quantiles[0],
                        "observations": observations_label(len(traj.data))}, ignore_index=True)

# Fitness distribution plot

fig = go.Figure()

point_pos_2 = [-0.5,-0.5, -1.2]
point_pos_high = [0.5,0.4, 0.9]
show_legend = [True,False,False,False]

for i, gene in enumerate(['DNMT3A', 'TET2']):
    df_aux = df[df.gene == gene]

    fig.add_trace(
        go.Violin(
            x=df_aux["gene"][df_aux["observations"] == "<2"],
            y=df_aux["fitness"][df_aux['observations'] == "<2"],
            legendgroup='<2', scalegroup='<2', name='2',
            side='negative',
            pointpos=point_pos_2[i], # where to position points
            line_color='lightseagreen',
            showlegend=show_legend[i])
                )
    fig.add_trace(
        go.Violin(
            x=df_aux["gene"][df_aux["observations"] == ">2"],
            y=df_aux["fitness"][df_aux['observations'] == ">2"],
            legendgroup='>2', scalegroup='>2', name='>2',
            side='positive',
            pointpos=point_pos_high[i], # where to position points
            line_color='mediumpurple',
            showlegend=show_legend[i]))

fig.add_trace(
    go.Violin(
        x=['All genes']*len(df["observations"] == "<2"),
        y=df["fitness"][df['observations'] == "<2"],
        legendgroup='<2', scalegroup='<2', name='2',
        side='negative',
        pointpos=point_pos_2[2], # where to position points
        line_color='lightseagreen',
        showlegend=False)
            )
fig.add_trace(
        go.Violin(
            x=['All genes']*len(df["observations"] == ">2"),
            y=df["fitness"][df['observations'] == ">2"],
            legendgroup='>2', scalegroup='>2', name='>2',
            side='positive',
            pointpos=point_pos_high[2], # where to position points
            line_color='mediumpurple',
            showlegend=False))
                
# update characteristics shared by all traces
fig.update_traces(meanline_visible=True,
                  points='all', # show all points
                  jitter=0.05,  # add some jitter on points for better visibility
                  scalemode='count') #scale violin plot area with total count

fig.update_layout(
    title_text="Fitness distribution by observations",
    yaxis_title='Fitness',
    yaxis_range=[-0.02,0.8],
    violingap=0, violingroupgap=0.2, violinmode='overlay',
    legend=dict(title='Number of observations'),
    margin=dict(pad=4))



fig.write_image(path + "Fitness_distribution_observations.png", scale=10)
fig.write_image(path + "Fitness_distribution_observations.svg")

fig

NameError: name 'lbc' is not defined