# Phase 3: Setting up the BEAST xmls

<details>
    <summary>Click To See A Decription of Parameters</summary>
        <pre>
            <code>
save_dir: str  
    Path to directory for saving outputs in.

template_xml_path: str
    Path to template BEAST xml.

use_initial_tree:  bool, default True
    Is there an inital tree to be used. If not the initial tree will not be used in generating a BEAST 2 xml
    and will BEAST 2 generate its own.

rt_dims: int, optional
    Number of Rt dimensions (time periods over which Rt is estimated).

rt_change_dates: : list, tuple, pd.Series or pd.DatetimeIndex of datetimes
    Internal partitions of Rt estimation periods.

sampling_prop_dims: int, optional
    Number of sampling proportion dimensions (time periods over which sampling proportion is estimated).

sampling_prop_change_dates: : list, tuple, pd.Series or pd.DatetimeIndex of dates
    Internal partitions of sampling proportion estimation periods.

metadata_path: str
       Path to csv or tsv containing metadata.

collection_date_field: str, default='date'
    Name of field in metadata_db containing collection dates of sequences. Should be format YYYY-MM-DD.

sample_id_field: str, default 'strain'
    Name of field in metadata_db containing ids corresponding to those used in fasta_path.

initial_tree_path: str
    Path to initial_tree. Should .nwk file.

fasta_path: str
    Path to fasta file containing sequences.

origin_start_addition float
    This + initial temporal tree height is used as starting value of origin.
    We recommend using an estimate of infection period for the pathogen being studied. **Value should be in years.**
    Origin prio will be unform:
        Lower value: time in years from oldest to youngest sequence in fasta_path
        Start value: origin_start_addition + initial temporal tree height
        Upper value:  initial temporal tree height + origin_upper_addition.

origin_upper_addition: float/int
    This + initial temporal tree height is used as upper value of origin prior. **Value should be in years.**
    Origin prio will be unform:
        Lower value: time in years from oldest to youngest sequence in fasta_path
        Start value: origin_start_addition + initial temporal tree height
        Upper value:  initial temporal tree height + origin_upper_addition.

origin_prior: dict {'lower': float, 'upper': float, 'start': float}, optional
       Details of the origin prior. assumed to be uniformly distributed.

log_file_basename: str, optional
    If provided .tree, .log and .state files from running BEAST 2 will have this name prefixed by 'run-with-seed-{seed}-',
    number being that of the chain.

chain_length: int
    Number of chains to use for BEAST runs.

trace_log_every: int
    How often to save a log file during BEAST runs.

tree_log_every: int
    How often to save a tree file during BEAST runs.

screen_log_every: int
    How often to output to screen during BEAST runs.

store_state_every: int 
    How often to store MCMC state during BEAST runs.
  </code>
</pre>


In [None]:
save_dir = None
template_xml_path = None
fasta_path = None
use_initial_tree = True
initial_tree_path = None
metadata_path = None
rt_dims = None
rt_changes = None
sampling_prop_dims=None
sampling_prop_changes=None
zero_sampling_before_first_sample=True
collection_date_field = 'date'
sample_id_field='strain'
origin_upper_addition = None
origin_prior = None
origin_start_addition = None
log_file_basename=None
chain_length = None
trace_log_every = None
tree_log_every = None
screen_log_every = None
store_state_every = None

Import packages. 

In [None]:
import os
from beast_pype.beast_xml_gen import gen_bdsky_serial_xml

## Generating the BEAST2 xmls.

In [None]:
gen_bdsky_serial_xml(
    template_path=template_xml_path,
    sequences_path=fasta_path,
    metadata_path=metadata_path,
    initial_tree_path=initial_tree_path,
    origin_prior=origin_prior,
    collection_date_field=collection_date_field,
    sample_id_field=sample_id_field,
    origin_upper_height_addition=origin_upper_addition,
    origin_start_addition=origin_start_addition,
    output_path=f"{save_dir}/beast.xml",
    rt_dims=rt_dims,
    rt_change_dates=rt_changes,
    sampling_prop_dims=sampling_prop_dims,
    sampling_prop_change_dates=sampling_prop_changes,
    #  For when we can get this working in beast2xml see https://github.com/acorg/beast2-xml/issues/21#issuecomment-3553733538.
    # zero_sampling_before_first_sample=zero_sampling_before_first_sample,
    log_file_basename=log_file_basename,
    chain_length=chain_length,
    trace_log_every=trace_log_every,
    tree_log_every=tree_log_every,
    screen_log_every=screen_log_every,
    store_state_every=store_state_every
)