In [1]:
from cloudpathlib import CloudPath, AnyPath
import treeViz as tv
import pandas as pd


In [2]:
output_dir = AnyPath("../data/generated/prune_tree/")
output_dir.mkdir(parents=True, exist_ok=True)

formatted_names_file = AnyPath("../data/imported/display_names.csv")

processed_genomes = AnyPath(
    "/Users/sunitjain/Github/HuangLab/TransposonLibrary_Voodgt_Tripathi_etal/data/generated/process_genomes"
)
gtdb_tk_s3_basepath = CloudPath(
    "s3://genomics-workflow-core/Results/BinQC/TransposonLibrary/20210331/04_GTDBtk/gtdbtk-results/full_tree_manually_generated/"
)
gtdb_tk_s3_summary_file = (
    gtdb_tk_s3_basepath / "gtdb.TransposonLibrary_20210331.bac120.summary.tsv"
)
gtdb_tk_s3_tree_file = (
    gtdb_tk_s3_basepath / "gtdb.TransposonLibrary_20210331.bac120.classify.tree"
)

summary_file = processed_genomes / gtdb_tk_s3_summary_file.name
tree_file = processed_genomes / gtdb_tk_s3_tree_file.name


In [3]:
# download file to processed_genomes
gtdb_tk_s3_summary_file.download_to(summary_file)
gtdb_tk_s3_tree_file.download_to(tree_file)


PosixPath('/Users/sunitjain/Github/HuangLab/TransposonLibrary_Voodgt_Tripathi_etal/data/generated/process_genomes/gtdb.TransposonLibrary_20210331.bac120.classify.tree')

In [4]:
summary_prefix = output_dir / "TransposonLibrary_20210331"
summary_prefix = summary_prefix.as_posix()
summary_file = summary_file.as_posix()
tree_file = tree_file.as_posix()
formatted_names_file = formatted_names_file.as_posix()


In [5]:
genomes, color_dict, common_ancestor = tv.parse_summary_file(
    summary_file, out_prefix=summary_prefix, color=True, taxa_rank="phylum"
)


In [6]:
formatted_name_df = pd.read_csv(
    formatted_names_file,
    header=0,
    usecols=["node_name", "display_name"],
)
formatted_name_dict = dict(
    zip(formatted_name_df.node_name, formatted_name_df.display_name)
)


### Circular tree w/ Phylum color background

In [7]:
tree_prefix = output_dir / "TransposonLibrary_20210331.circular_w_bgcolor"
tv.generate_tree(
    tree_file,
    genomes,
    out_prefix=tree_prefix,
    circular=True,
    color_dict=color_dict,
    common_ancestor=common_ancestor,
    formatted_name_dict=formatted_name_dict,
)

### Rectangular tree w/ Phylum color background

In [8]:
tree_prefix = output_dir / "TransposonLibrary_20210331.rect_w_bgcolor"
tv.generate_tree(
    tree_file,
    genomes,
    out_prefix=tree_prefix,
    circular=False,
    color_dict=color_dict,
    common_ancestor=common_ancestor,
    formatted_name_dict=formatted_name_dict,
)

### Rectangular tree w/o color

In [9]:
tree_prefix = output_dir / "TransposonLibrary_20210331.rect_no_color"
tv.generate_tree(
    tree_file,
    genomes,
    out_prefix=tree_prefix,
    circular=False,
    color_dict=None,
    common_ancestor=None,
    formatted_name_dict=formatted_name_dict,
)