# Phase 3: Setting up the BEAST xmls

<details>
    <summary>Click To See A Decription of Parameters</summary>
        <pre>
            <code>
save_dir: str  
    Path to directory for saving outputs in.

template_xml_path: str
    Path to template BEAST xml.

use_initial_tree:  bool, default True
    Is there an inital tree to be used. If not the initial tree will not be used in generating a BEAST 2 xml
    and will BEAST 2 generate its own.

metadata_path: str
       Path to csv or tsv containing metadata.

collection_date_field: str, default='date'
    Name of field in metadata_db containing collection dates of sequences. Should be format YYYY-MM-DD.

sample_id_field: str, default 'strain'
    Name of field in metadata_db containing ids corresponding to those used in fasta_path.

initial_tree_path: str
    Path to initial_tree. Should .nwk file.

fasta_path: str
    Path to fasta file containing sequences.


log_file_basename: str, optional
    If provided .tree, .log and .state files from running BEAST 2 will have this name prefixed by 'run-with-seed-{seed}-',
    number being that of the chain.

chain_length: int
    Number of chains to use for BEAST runs.

trace_log_every: int
    How often to save a log file during BEAST runs.

tree_log_every: int
    How often to save a tree file during BEAST runs.

screen_log_every: int
    How often to output to screen during BEAST runs.

store_state_every: int 
    How often to store MCMC state during BEAST runs.
  </code>
</pre>


In [None]:
save_dir = None
template_xml_path = None
fasta_path = None
use_initial_tree = True
initial_tree_path = None
metadata_path = None
collection_date_field = 'date'
sample_id_field='strain'
log_file_basename=None
chain_length = None
trace_log_every = None
tree_log_every = None
screen_log_every = None
store_state_every = None

Import packages. 

In [None]:
import os
from beast_pype.beast_xml_gen import gen_xml_from_any_template
from Bio import SeqIO

### Search for files in save_dir if not provided

In [None]:
if use_initial_tree:
    if initial_tree_path is None:
        initial_tree_path = f'{save_dir}/initial_trees/down_sampled_time.nwk'
        if not os.path.exists(initial_tree_path):
            initial_tree_path = f'{save_dir}/initial_trees/full_time.nwk'
            if not os.path.exists(initial_tree_path):
                initial_tree_path = f'{save_dir}/initial_trees/iqtree.nwk'
                if not os.path.exists(initial_tree_path):
                    raise FileNotFoundError(f'Initial tree file not found. initial_tree_path has not been provided and none of the files down_sampled_time.nwk, full_time.nwk or iqtree.treefile can not be found in save_dir ({save_dir}).')

if metadata_path is None:
    metadata_path = f'{save_dir}/down_sampled_metadata.csv'
    if not os.path.exists(metadata_path):
        metadata_path = f'{save_dir}/metadata.csv'
    if not os.path.exists(metadata_path):
        raise FileNotFoundError(f'Metadata file not found. \n' +
                                f'A metadata_path has not been provided and neither the file down_sampled_metadata.csv or metadata.csv can be found in save_dir ({save_dir}).')

if fasta_path is None:
    fasta_path = f'{save_dir}/down_sampled_sequences.fasta'
    if not os.path.exists(fasta_path):
        fasta_path = f'{save_dir}/sequences.fasta'
    if not os.path.exists(fasta_path):
        raise FileNotFoundError(f'Fasta file not found.  \n' +
                                f'fasta_path has not been provided and neither the file down_sampled_sequences.fasta or sequences.fasta can be found in save_dir ({save_dir}).')


if not use_initial_tree and initial_tree_path is not None:
    raise AssertionError('use_initial_tree is False but you have provided an initial_tree_path?')


## Actually Generating the BEAST2 xmls.

In [None]:
if use_initial_tree:
    gen_xml_from_any_template(
        template_path=template_xml_path,
        sequences_path=fasta_path,
        metadata_path=metadata_path,
        sample_id_field=sample_id_field,
        collection_date_field=collection_date_field,
        initial_tree_path=initial_tree_path,
        output_path=f"{save_dir}/beast.xml",
        log_file_basename=log_file_basename,
        chain_length=chain_length,
        trace_log_every=trace_log_every,
        tree_log_every=tree_log_every,
        screen_log_every=screen_log_every,
        store_state_every=store_state_every
    )
else:
    gen_xml_from_any_template(
        template_path=template_xml_path,
        sequences_path=fasta_path,
        metadata_path=metadata_path,
        sample_id_field=sample_id_field,
        collection_date_field=collection_date_field,
        output_path=f"{save_dir}/beast.xml",
        log_file_basename=log_file_basename,
        chain_length=chain_length,
        trace_log_every=trace_log_every,
        tree_log_every=tree_log_every,
        screen_log_every=screen_log_every,
        store_state_every=store_state_every
    )