# BDSky Serial's Output Figures & Stats

**A note on Birth Death Skyline Models**
The results in this notebook are from a phylodynamics pipeline using Birth Death Skyline Models. Reading material on Birth Death Skyline Models can be found at:
* [Taming the BEAST Tutorial: Skylineplots](https://taming-the-beast.org/tutorials/Skyline-plots/) 
* [Stadler et al. 2012 PNAS](https://www.pnas.org/doi/full/10.1073/pnas.1207965110)


In [None]:
save_dir = None
metadata_path = '../../../example_data/COVID-19_BA.2.86/metadata.tsv'

In [None]:
from beast_pype.outputs import (read_log_file, plot_skyline, plot_hist_kde, plot_origin_or_tmrca, hdi_columns_starting_with)
from beast_pype.date_utilities import date_to_decimal
import os
import pandas as pd

In [None]:
if save_dir is None:
    save_dir = os.getcwd()

Date pipeiline was run:

In [None]:
display(save_dir.split('/')[-1].split('_')[0])

In [None]:
#Load Trace:
log_file = f'{save_dir}/merged.log'
csv_file = f'{save_dir}/merged_log.csv'
if os.path.isfile(log_file):
    trace_file = log_file
else:
    trace_file = csv_file
trace_df = read_log_file(trace_file)

In [None]:
if metadata_path.endswith('.tsv'):
    delimiter = '\t'
elif metadata_path.endswith('.csv'):
    delimiter = ','
else:
    raise TypeError(
        f"metadata_path must be a csv or tsv file, ending with the apporpraite file extension. Value given is {metadata_path}")
metadata_df = pd.read_csv(metadata_path, parse_dates=['date'], sep=delimiter)
youngest_tip_date = metadata_df['date'].max()
youngest_tip_year_decimal = date_to_decimal(youngest_tip_date)

## Infection Period 

BD Skyline models estimate the rate of becoming uninfectious (whose inverse if the average infection period). 

In [None]:
uninfectious_cols = [col for col in trace_df.columns if col.startswith('becomeUninfectiousRate_BDSKY_Serial')]
if len(uninfectious_cols) > 1:
    for col in uninfectious_cols:
        part = col.split('.')[1]
        trace_df[f'Rate of Becoming Uninfectious (per day).{part}'] = trace_df[col] / 365
        trace_df[f'Infection period (per day).{part}'] = 1 / trace_df[f'Rate of Becoming Uninfectious (per day).{part}']
    infectious_fig, ax = plot_skyline(trace_df,
                           youngest_tip_year_decimal ,
                           parameter_start='Infection period (per day)',
                           ylabel='Infection period (per day)',
                           grid_size=100,
                           include_grid=True)
    infectious_hdi = hdi_columns_starting_with(trace_df, 'Infection period (per day)')
elif len(uninfectious_cols) == 1:
    trace_df['Rate of Becoming Uninfectious (per day)'] = trace_df['becomeUninfectiousRate_BDSKY_Serial'] / 365
    trace_df['Infection period (per day)'] = 1 / trace_df['Rate of Becoming Uninfectious (per day)']
    infectious_fig, ax, infectious_hdi = plot_hist_kde(trace_df=trace_df, parameter='Infection period (per day)', hdi_prob=0.95)
else:
    raise ValueError('Trace file does not contain any columns starting with "becomeUninfectiousRate_BDSKY_Serial".')
display(infectious_hdi)

# Sampling Proportion

In [None]:
def outputs_for_possible_skyline(trace_df, starting_with, axis_label, grid_size=100, include_grid=True):
    cols_starting_with = [col for col in trace_df.columns if col.startswith(starting_with)]
    if len(cols_starting_with) > 1:
        fig, ax = plot_skyline(trace_df,
                               youngest_tip_year_decimal,
                               parameter_start=starting_with,
                               ylabel=axis_label,
                               grid_size=grid_size,
                               include_grid=include_grid)
        hdi = hdi_columns_starting_with(trace_df, starting_with)
    elif len(cols_starting_with) == 1:
        fig, ax, hdi = plot_hist_kde(trace_df=trace_df, parameter=starting_with, hdi_prob=0.95, xlabel=axis_label)
    else:
        raise ValueError(f'Trace file does not contain any columns starting with "{starting_with}".')
    return fig, ax, hdi

sampling_fig, ax, sampling_hdi = outputs_for_possible_skyline(trace_df, 'samplingProportion_BDSKY_Serial', axis_label='Sampling Proportion (p)')
display(sampling_hdi)

# Origin

The origin is the time at which the index case (the first case) became infected, which is slightly earlier than the time-to-the-most-recent-common-ancestor (TMRCA). 

In [None]:
trace_df['Origin'] = youngest_tip_year_decimal  - trace_df['origin_BDSKY_Serial']
fig, ax, hdi_est =  plot_origin_or_tmrca(trace_df, parameter='Origin', x_tick_freq='weekly', hdi_prob=0.95)
display({key: value.strftime('%Y-%m-%d') for key, value in hdi_est.items()})

# TMRCA

In [None]:
trace_df['TMRCA'] = youngest_tip_year_decimal  - trace_df['TreeHeight']
fig, ax, hdi_est =  plot_origin_or_tmrca(trace_df, parameter='TMRCA', x_tick_freq='weekly', hdi_prob=0.95)
display({key: value.strftime('%Y-%m-%d') for key, value in hdi_est.items()})

# $R_T$

In [None]:
rt_fig, ax, rt_hdi = outputs_for_possible_skyline(trace_df, 'reproductiveNumber_BDSKY_Serial', axis_label='Reproductive Number ($R_t$)')
display(rt_hdi)