# Compile Results

All the program outputs are in different formats. The code here compiles them into a common format for easier comparison.

In [1]:
# to suppress warning from ete3 because it's not up to date with python3.12
import warnings
warnings.filterwarnings("ignore", category=SyntaxWarning)

In [2]:
import ete3
import pandas as pd
import os
import sys

# in ../code we have compile_results.py which contains all the functions we need
sys.path.append(os.path.join(os.path.dirname(os.getcwd()), 'code'))
import compile_results

In [3]:
genome_tree_labeled_path = "../data/genome_tree/genome_tree.iqtree.treefile.rooted.labeled"
genome_tree_labeled = ete3.Tree(genome_tree_labeled_path, format=1)

## Count

In [5]:
# compile the results for the count changes in the gene dynamics 
print("\033[1m" + "Compiling count changes in gene dynamics" + "\033[0m")
count_output_dir = "../data/inferences/gene_dynamics/Count/"
compile_results.compile_count_changes(count_dir=count_output_dir,
                                    input_tree_filepath=genome_tree_labeled_path,
                                    compiled_results_dir="../data/compiled_results/",
                                    var_str='gene', var_name_str='nog_id')
print("-----------------------------------")
# compile the results for the count changes in the ecosystem type dynamics
print("\033[1m" + "Compiling count changes in ecosystem type dynamics" + "\033[0m")
count_output_dir = "../data/inferences/ecotype_dynamics/Count/"
compile_results.compile_count_changes(count_dir=count_output_dir,
                                    input_tree_filepath=genome_tree_labeled_path,
                                    compiled_results_dir="../data/compiled_results/",
                                    var_str='ecotype', var_name_str='ecotype')
print("-----------------------------------")
# compile the results for the count changes in the ecosystem subtype dynamics
print("\033[1m" + "Compiling count changes in ecosystem subtype dynamics" + "\033[0m")
count_output_dir = "../data/inferences/ecosubtype_dynamics/Count/"
compile_results.compile_count_changes(count_dir=count_output_dir,
                                    input_tree_filepath=genome_tree_labeled_path,
                                    compiled_results_dir="../data/compiled_results/",
                                    var_str='ecosubtype', var_name_str='ecosubtype')
print("-----------------------------------")
print("All results compiled and saved in ../data/compiled_results/")


[1mCompiling count changes in gene dynamics[0m
Count branchwise transfers:
     branch  transfers
0    109790         24
1      1587         60
2      1604         53
3     33959         29
4      1582         66
..      ...        ...
311     N27        130
312     N17          0
313     N11          0
314      N7          0
315      N3          0

[316 rows x 2 columns]
Count varwise transfers:
       nog_id  transfers
1747  COG0001          0
1745  COG0002          0
1746  COG0003          2
1750  COG0004          0
1751  COG0005          0
...       ...        ...
8192    DA68R          2
8193    DA697          1
8194    DA6A4          0
8195    DA6AQ          3
8196    DA6FD          1

[8197 rows x 2 columns]
Count varwise, branchwise transfers:
          nog_id  branch  transfers  losses  expansions  reductions
48       COG0001     N44          0       2           0           0
57       COG0001   35841          0       0           0           1
64       COG0001  260554        