# Compile results

The code run here compiles each of the program outputs into a common format, for further analysis. 

In [1]:
# to suppress warning from ete3 because it's not up to date with py3.12
import warnings
# ignore SyntaxWarning
warnings.filterwarnings("ignore", category=SyntaxWarning)

In [2]:
import numpy as np
import pandas as pd
from tqdm import tqdm

# ignore SettingWithCopyWarning for pandas
pd.options.mode.chained_assignment = None

In [3]:
from lib.output_compilation_functions import *

In [4]:
# make a directory to store the compiled results
res_dir = '../data/compiled_results/'
if not os.path.exists(res_dir):
    os.makedirs(res_dir)

data_dir = '../data'
program_runs_dir = '../data/program_runs/'
taxonomic_id = '1236'
input_tree_filepath = f"{
    data_dir}/1236_wol_tree_pruned_with_internal_labels.nwk"

## AnGST

In [5]:
angst_output_dir = f"{program_runs_dir}/AnGST/Results/"
nogwise_branchwise_angst_df, nogwise_angst_df = compile_angst_results(angst_output_dir, input_tree_filepath)
print("AnGST results compiled.\nNOGwise DF:")
display(nogwise_angst_df)
print("NOGwise branchwise DF:")
display(nogwise_branchwise_angst_df)
nogwise_angst_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.angst.tsv", index=False, header=True, sep='\t')
nogwise_branchwise_angst_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.branchwise.angst.tsv", index=False, header=True, sep='\t')

Original transfers DF looks like:


Unnamed: 0,nog_id,source_branch,recipient_branch
0,ER9VY,1461581,216142-243924-1028989-223283-205918-157783-384...
1,ER9VY,157783-384676-160488,1548547
2,ER9VY,1548547,1300345-913325-1199154-1122185-1736549-1385517...
3,ER9VY,1548547,697282-1116472-1091494-857087-1538553
4,ER9VY,1548547,167879-58049
...,...,...,...
47440,ERT8F,207954-619304,1897630
47441,ERT8F,207954-619304,1543721
47442,ERT8F,207954-619304,62101
47443,ERT8F,1897630,1859457


Processing transfer thresholds:   0%|          | 0/1 [00:00<?, ?it/s]

AnGST results compiled.
NOGwise DF:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,33,1
1,EQRFZ,46,1
2,EQRG2,28,1
3,EQRGC,39,1
4,EQRGG,42,1
...,...,...,...
1295,ETCI9,30,1
1296,ETCIB,24,1
1297,ETCIZ,46,1
1298,ETCJF,50,1


NOGwise branchwise DF:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers
0,ER9VY,1461581,N166,1
1,ER9VY,N165,1548547,1
2,ER9VY,1548547,N28,1
3,ER9VY,1548547,N72,1
4,ER9VY,1548547,N229,1
...,...,...,...,...
47440,ERT8F,N203,1897630,1
47441,ERT8F,N203,1543721,1
47442,ERT8F,N203,62101,1
47443,ERT8F,1897630,1859457,1


## ALE

In [6]:
# Line #3 of the uml_rec files have the ALE tree. Extract that newick string, and read the internal node names of that tree
# Map those nodes of the ALE tree to the input species tree.

input_tree = ete3.Tree(input_tree_filepath, format=1)

# do the following for each dir in run_dir that ends with 'ALE'

# first, get the list of all the ALE dirs
ale_dirs = [d for d in os.listdir(program_runs_dir) if d.endswith('ALE')]
print(ale_dirs)

for ale_dir in ale_dirs:
    print(f"Processing {ale_dir}...")
    nogwise_ale_df, nogwise_branchwise_ale_df = compile_ale_outputs(os.path.join(program_runs_dir, ale_dir, 'Results'), input_tree)

    # write it out
    nogwise_branchwise_ale_df.to_csv(
        f"{res_dir}/compiled_transfers.nogwise.branchwise.{ale_dir.lower()}.tsv",
        index=False,
        header=True,
        sep="\t",
    )
    nogwise_ale_df.to_csv(
        f"{res_dir}/compiled_transfers.nogwise.{ale_dir.lower()}.tsv",
        index=False,
        header=True,
        sep="\t",
    )
    print("Nogwise, branchwise transfers:")
    display(nogwise_branchwise_ale_df)
    print("Nogwise transfers:")
    display(nogwise_ale_df)

    print("-----------------------------------")

['ALE']
Processing ALE...
Files to be written with ale:


Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

Nogwise, branchwise transfers:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers
0,ERA5M,1006000,1177154,0.01
1,ERA5M,1006000,N119,0.01
2,ERA5M,1006000,N211,0.01
3,ERA5M,1009858,1513271,0.01
4,ERA5M,1009858,N125,0.01
...,...,...,...,...
2357745,ERZXU,N349,N228,0.01
2357746,ERZXU,N349,N207,0.02
2357747,ERZXU,N349,N209,0.01
2357748,ERZXU,N350,N212,0.01


Nogwise transfers:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,33.56,0.010000
1,EQRFZ,49.61,0.010000
2,EQRG2,30.19,0.010000
3,EQRGC,43.15,0.010000
4,EQRGG,43.22,0.010000
...,...,...,...
0,EREPP,2.23,2.140303
0,EREPP,2.23,2.162727
0,EREPP,2.23,2.185152
0,EREPP,2.23,2.207576


-----------------------------------


## Ranger

In [14]:
ranger_output_dirs = [f"{program_runs_dir}/{d}/Results/" for d in os.listdir(program_runs_dir) if d.startswith('RANGER')]

for ranger_output_dir in ranger_output_dirs:
    print(f"Processing {ranger_output_dir}...")
    ranger_dir = os.path.basename(os.path.dirname(os.path.dirname(ranger_output_dir)))
    print(f"Ranger dir: {ranger_dir}")

    nogwise_branchwise_ranger_df, nogwise_ranger_df = compile_ranger_results(
        ranger_output_dir, input_tree_filepath
    )
    print("NOGwise DF:")
    display(nogwise_ranger_df)
    print("NOGwise branchwise DF:")
    display(nogwise_branchwise_ranger_df)
    nogwise_ranger_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.{os.path.basename(ranger_dir).lower()}.tsv", index=False, header=True, sep='\t')
    nogwise_branchwise_ranger_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.branchwise.{os.path.basename(ranger_dir).lower()}.tsv", index=False, header=True, sep='\t')

Processing ../data/program_runs//RANGER/Results/...
Ranger dir: RANGER


Processing NOGs:   0%|          | 0/1300 [00:00<?, ?it/s]

Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

NOGwise DF:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,33.21,0.010000
1,EQRFZ,49.00,0.010000
2,EQRG2,29.02,0.010000
3,EQRGC,41.48,0.010000
4,EQRGG,43.42,0.010000
...,...,...,...
0,ET4RU,3.68,3.531717
0,ET4RU,3.68,3.568788
0,ET4RU,3.68,3.605859
0,ET4RU,3.68,3.642929


NOGwise branchwise DF:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers
0,ERCE7,380703,N297,0.88
1,ERCE7,N254,1799789,0.01
2,ERCE7,1879031,247633,0.10
3,ERCE7,N231,N54,0.03
4,ERCE7,N296,910964,0.48
...,...,...,...,...
185391,ERVXT,N13,1797696,0.02
185392,ERVXT,N114,448,0.24
185393,ERVXT,1895767,N155,0.09
185394,ERVXT,N341,550540,1.00


Processing ../data/program_runs//RANGER-Fast/Results/...
Ranger dir: RANGER-Fast


Processing NOGs:   0%|          | 0/1300 [00:00<?, ?it/s]

Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

NOGwise DF:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,32.0,0.030000
1,EQRFZ,49.0,0.030000
2,EQRG2,27.0,0.030000
3,EQRGC,41.0,0.030000
4,EQRGG,42.0,0.030000
...,...,...,...
0,ET4FF,4.0,3.839596
0,ET4FF,4.0,3.879697
0,ET4FF,4.0,3.919798
0,ET4FF,4.0,3.959899


NOGwise branchwise DF:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers
0,ERCE7,1736225,1484157,1.0
1,ERCE7,N109,247634,1.0
2,ERCE7,523791,1897630,1.0
3,ERCE7,1331007,N343,1.0
4,ERCE7,N296,910964,1.0
...,...,...,...,...
49241,ERVXT,N230,N338,1.0
49242,ERVXT,N13,1301098,1.0
49243,ERVXT,N114,448,1.0
49244,ERVXT,N12,1300345,1.0


## GLOOME

GLOOME only infers gains or losses on branches, but without any inference of the source of the gene transfers for the gains. This means that we don't have anything for the source column but we have only the recipient column.

In [8]:
pa_matrix_tsv_filepath = f"{data_dir}/1236_pa_matrix.tsv"

# first we compile results for the case with the species tree,
gloome_output_dir_wt = f"{program_runs_dir}GLOOME_with_tree/Results_GLOOME_with_tree/"
input_tree = ete3.Tree(input_tree_filepath, format=1)

gloome_wt_results_dict = read_and_compile_gloome_results(
    gloome_output_dir=gloome_output_dir_wt, input_tree=input_tree, species_tree_bool=True,
    pa_matrix_tsv_filepath=pa_matrix_tsv_filepath)
print("Results compiled for GLOOME run with species tree.")
for key, df in gloome_wt_results_dict.items():
    print(f"{key} df:")
    display(df)
    df.to_csv(f"{res_dir}/{key}.tsv", 
                index=False, header=True, sep='\t')


# then the case without the species tree
gloome_output_dir_wot = f"{program_runs_dir}GLOOME_without_tree/Results_GLOOME_without_tree/"
gloome_wot_results_dict = read_and_compile_gloome_results(
    gloome_output_dir=gloome_output_dir_wot, input_tree=input_tree, species_tree_bool=False,
    pa_matrix_tsv_filepath=pa_matrix_tsv_filepath)
print("Results compiled for GLOOME run without species tree.")
for key, df in gloome_wot_results_dict.items():
    print(f"{key} df:")
    display(df)
    df.to_csv(f"{res_dir}/{key}.without_tree.tsv", 
                index=False, header=True, sep='\t')

Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

Processing transfer thresholds:   0%|          | 0/1 [00:00<?, ?it/s]

Results compiled for GLOOME run with species tree.
compiled_transfers.nogwise.gloome.ml df:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,16.81498,0.050000
1,EQRFZ,11.32676,0.050000
2,EQRG2,1.44199,0.050000
3,EQRGC,38.71439,0.050000
4,EQRGG,6.08199,0.050000
...,...,...,...
209,ETCDP,1.99490,0.990107
210,ETCUH,0.99240,0.990107
0,EQYJP,1.00000,0.999700
1,ES59Z,1.00000,0.999700


compiled_transfers.nogwise.branchwise.gloome.ml df:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers,transfer_threshold,gloome_branch_name
1,ET9HP,unknown,N210,0.05797,0.05797,N165
2,ET9HP,unknown,N209,0.92970,0.92970,N185
6,ET9HP,unknown,1178482,0.95380,0.95370,1178482
8,ET9HP,unknown,N186,0.19190,0.19190,N203
12,ET9HP,unknown,314283,0.05668,0.05662,314283
...,...,...,...,...,...,...
223911,ETAWV,unknown,N276,0.05297,0.05296,N297
223913,ETAWV,unknown,343509,0.08645,0.08644,343509
223923,ETAWV,unknown,515618,0.05607,0.05505,515618
223945,ET5PB,unknown,N350,0.10260,0.10260,N222


compiled_losses.nogwise.branchwise.gloome.ml df:


Unnamed: 0,source_branch,branch,losses,transfer_threshold,gloome_branch_name
0,unknown,1896966,0.05675,0.05611,1896966
3,unknown,N182,0.91170,0.91150,N188
4,unknown,N173,0.05437,0.05436,N189
5,unknown,N181,0.07854,0.07854,N190
7,unknown,N191,0.19540,0.19530,N201
...,...,...,...,...,...
223954,unknown,595494,0.99480,0.99430,595494
223955,unknown,N332,0.79170,0.79170,N276
223956,unknown,N268,0.20180,0.20160,N277
223957,unknown,N331,0.18220,0.18220,N285


compiled_transfers.nogwise.gloome.mp df:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,18,1
1,EQRFZ,33,1
2,EQRG2,15,1
3,EQRGC,40,1
4,EQRGG,31,1
...,...,...,...
1281,ETCI9,27,1
1282,ETCIB,17,1
1283,ETCIZ,28,1
1284,ETCJF,41,1


compiled_transfers.nogwise.branchwise.gloome.mp df:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers,transfer_threshold,gloome_branch_name
0,ET9HP,unknown,1178482,1,1,1178482
1,ET9HP,unknown,N183,1,1,N198
2,ET9HP,unknown,966,1,1,966
3,ET9HP,unknown,349521,1,1,349521
4,ET9HP,unknown,N186,1,1,N203
...,...,...,...,...,...,...
40526,ETAWV,unknown,N343,1,1,N348
40527,ET5PB,unknown,1298881,1,1,1298881
40528,ET5PB,unknown,N216,1,1,N229
40529,ET5PB,unknown,N241,1,1,N230


compiled_losses.nogwise.branchwise.gloome.mp df:


Unnamed: 0,source_branch,branch,losses,transfer_threshold,gloome_branch_name
7,unknown,1232683,1,1,1232683
8,unknown,207954,1,1,207954
10,unknown,1331007,1,1,1331007
11,unknown,N295,1,1,N293
15,unknown,634500,1,1,634500
...,...,...,...,...,...
40530,unknown,56804,1,1,56804
40531,unknown,1535422,1,1,1535422
40533,unknown,357804,1,1,357804
40534,unknown,595494,1,1,595494


Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

Processing transfer thresholds:   0%|          | 0/1 [00:00<?, ?it/s]

Results compiled for GLOOME run without species tree.
compiled_transfers.nogwise.gloome.ml df:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,11.23777,0.05
1,EQRFZ,17.30847,0.05
2,EQRG2,2.19190,0.05
3,EQRGC,26.10110,0.05
4,EQRGG,9.88948,0.05
...,...,...,...
582,ETCDP,2.04400,1.00
583,ETCEN,1.01900,1.00
584,ETCI9,2.05600,1.00
585,ETCIB,1.01100,1.00


compiled_transfers.nogwise.branchwise.gloome.ml df:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers,transfer_threshold
6,ET9HP,unknown,515618,0.99810,0.9981
7,ET9HP,unknown,1410383,0.99480,0.9948
19,ET9HP,unknown,1178482,0.91850,0.9181
22,ET9HP,unknown,1288826,0.65520,0.6551
28,ET9HP,unknown,349521,0.84130,0.8385
...,...,...,...,...,...
275564,ETAWV,unknown,1122209,0.05587,0.0550
275571,ETAWV,unknown,1656094,0.91420,0.9130
275574,ETAWV,unknown,314608,0.93170,0.9314
275577,ETAWV,unknown,1859457,0.95030,0.9481


compiled_losses.nogwise.branchwise.gloome.ml df:


Unnamed: 0,source_branch,branch,losses,transfer_threshold
3,unknown,634500,0.9976,0.9975
4,unknown,1115515,0.9938,0.9936
5,unknown,1681196,0.9976,0.9974
10,unknown,207954,0.3386,0.3376
12,unknown,1118153,0.4660,0.4642
...,...,...,...,...
275586,unknown,380703,1.0000,0.9961
275589,unknown,56804,0.7820,0.7819
275591,unknown,745411,1.0000,0.9980
275592,unknown,357804,0.9990,0.9974


compiled_transfers.nogwise.gloome.mp df:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,17,1
1,EQRFZ,28,1
2,EQRG2,15,1
3,EQRGC,28,1
4,EQRGG,19,1
...,...,...,...
1269,ETCI9,22,1
1270,ETCIB,12,1
1271,ETCIZ,17,1
1272,ETCJF,29,1


compiled_transfers.nogwise.branchwise.gloome.mp df:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers,transfer_threshold
4,ET9HP,unknown,515618,1,1
5,ET9HP,unknown,1410383,1,1
10,ET9HP,unknown,314283,1,1
11,ET9HP,unknown,1178482,1,1
12,ET9HP,unknown,1288826,1,1
...,...,...,...,...,...
43633,ETAWV,unknown,214092,1,1
43636,ETAWV,unknown,1656094,1,1
43637,ETAWV,unknown,314608,1,1
43638,ETAWV,unknown,1859457,1,1


compiled_losses.nogwise.branchwise.gloome.mp df:


Unnamed: 0,source_branch,branch,losses,transfer_threshold
1,unknown,634500,1,1
2,unknown,1115515,1,1
3,unknown,1681196,1,1
7,unknown,1232683,1,1
17,unknown,1331007,1,1
...,...,...,...,...
43641,unknown,380703,1,1
43643,unknown,56804,1,1
43644,unknown,745411,1,1
43645,unknown,357804,1,1


## Count

### Asymmetric Wagner Parsimony

In [9]:
count_MP_output_dir = f"{program_runs_dir}/Count/Count_MP/"
count_MP_nogwise_transfers_df = compile_count_mp_nogwise_transfers(count_MP_output_dir, taxonomic_id, res_dir)
print("Count MP nogwise transfers:")
display(count_MP_nogwise_transfers_df)
count_MP_nogwise_transfers_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.count.mp.tsv", 
                                    index=False, header=True, sep='\t')

Processing transfer thresholds:   0%|          | 0/10 [00:00<?, ?it/s]

Count MP nogwise transfers:


Unnamed: 0,nog_id,transfers,transfer_threshold
10,EQRZ4,1,8
13,EQSA4,1,8
14,EQSAX,1,8
22,EQSTQ,2,8
23,EQSTU,1,8
...,...,...,...
1281,ETCI9,138,0.33
1282,ETCIB,90,0.33
1283,ETCIZ,157,0.33
1284,ETCJF,180,0.33


In [10]:
!bash lib/compile_count_mp_nw_bw_parallel.sh $program_runs_dir $taxonomic_id $input_tree_filepath $res_dir

Input dir is ../data/program_runs//Count/Count_MP/
Tree filepath is ../data/1236_wol_tree_pruned_with_internal_labels.nwk and output dir is ../data/compiled_results//count/
Files are ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.33_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_0.5_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_1_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_2_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_3_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_4_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_5_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_6_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_7_families.tsv ../data/program_runs//Count/Count_MP//1236_Count_output_gain_8_families.tsv
Number of files is 10
Running: python3 lib/compile_c

In [11]:
# list of files. This doesn't include the nogwise branchwise file to be written out at the end,
# in case a previous version exists
count_mp_files = [
    f"{res_dir}/count_mp/{f}"
    for f in os.listdir(f"{res_dir}/count_mp/")
    if f.startswith("compiled_transfers.nogwise.branchwise.count.mp") 
]
count_mp_nogwise_branchwise_df = combine_count_mp_nw_bw_transfers(count_mp_files)
print("Count MP nogwise branchwise transfers:")
display(count_mp_nogwise_branchwise_df)
count_mp_nogwise_branchwise_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.branchwise.count.mp.tsv",
                                    index=False, header=True, sep='\t')

Gain penalty ratios:


['6', '1', '3', '0.5', '5', '7', '4', '8', '2', '0.33']

Count MP nogwise branchwise transfers:


Unnamed: 0,nog_id,recipient_branch,transfers,source_branch
0,EQRBG,N30,0.33,unknown
1,EQRBG,1304275,0.33,unknown
2,EQRBG,N116,0.33,unknown
3,EQRBG,N215,0.33,unknown
4,EQRBG,745411,0.33,unknown
...,...,...,...,...
702974,ETCUH,69222,8,unknown
702975,ETCEW,N87,8,unknown
702976,ETCEW,N41,8,unknown
702977,ETCEW,1548547,8,unknown


### Maximum Likelihood

In [12]:
count_ML_output_dir = f"{program_runs_dir}/Count/Count_ML/"
count_ML_output_file = os.path.join(count_ML_output_dir, "Count_output.tsv")
count_ml_nw_bw_gains_df, count_ml_nogwise_gains_df, count_ml_nw_bw_losses_df = process_count_ml_output(count_ML_output_file)

# Display and write out the dataframes to TSV files
print("Count_ML NOGwise branchwise transfers:")
display(count_ml_nw_bw_gains_df)
count_ml_nw_bw_gains_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.branchwise.count.ml.tsv", index=False, header=True, sep='\t')

print("Count ML NOGwise gains:")
display(count_ml_nogwise_gains_df)
count_ml_nogwise_gains_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.count.ml.tsv", index=False, header=True, sep='\t')

print("Count_ML NOGwise branchwise losses:")
display(count_ml_nw_bw_losses_df)
count_ml_nw_bw_losses_df.to_csv(f"{res_dir}/compiled_losses.nogwise.branchwise.count.ml.tsv", index=False, header=True, sep='\t')

Processing transfer thresholds:   0%|          | 0/100 [00:00<?, ?it/s]

Count_ML NOGwise branchwise transfers:


Unnamed: 0,nog_id,recipient_branch,transfers,source_branch
46,ERTPC,1896966,0.997253,unknown
51,ET8I8,1896966,0.996793,unknown
155,ERUVU,1896966,0.997317,unknown
158,ERQWV,1896966,0.996674,unknown
195,ERJ56,1896966,0.997302,unknown
...,...,...,...,...
920771,ER3QB,N357,0.000005,unknown
920772,ER3P3,N357,0.000004,unknown
920773,ERDWS,N357,0.000004,unknown
920774,ETAWV,N357,0.000004,unknown


Count ML NOGwise gains:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRBG,15.669436,1.372267e-15
1,EQRFZ,19.318403,1.372267e-15
2,EQRG2,6.139847,1.372267e-15
3,EQRGC,32.660359,1.372267e-15
4,EQRGG,17.762175,1.372267e-15
...,...,...,...
993,ETCIB,0.999963,9.898990e-01
994,ETCIZ,0.999233,9.898990e-01
995,ETCJF,1.982774,9.898990e-01
996,ETCUH,1.993069,9.898990e-01


Count_ML NOGwise branchwise losses:


Unnamed: 0,nog_id,branch,losses
0,ET9HP,1896966,0.000041
1,ERPXQ,1896966,0.000055
2,ET34D,1896966,0.000041
3,ESD2G,1896966,0.086637
4,ERNQY,1896966,0.022795
...,...,...,...
920771,ER3QB,N357,0.000041
920772,ER3P3,N357,0.000041
920773,ERDWS,N357,0.000041
920774,ETAWV,N357,0.000041


## Wn

In [5]:
wn_hgt_genes_filepath = f"{program_runs_dir}/Wn/Results/HGT_genes.tsv"
members_filepath = f"{data_dir}/{taxonomic_id}_nog_members.tsv"

wn_nogwise_hgt_df, wn_nogwise_branchwise_hgt_df = compile_wn_results(wn_hgt_genes_filepath, members_filepath)

print("Wn NOGwise HGTs:")
display(wn_nogwise_hgt_df)
print("Wn NOGwise branchwise HGTs:")
display(wn_nogwise_branchwise_hgt_df)

wn_nogwise_hgt_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.wn.tsv", index=False, header=True, sep='\t')
wn_nogwise_branchwise_hgt_df.to_csv(f"{res_dir}/compiled_transfers.nogwise.branchwise.wn.tsv", index=False, header=True, sep='\t')

Gene to NOG map for some genes:
1005058.UMN179_02117: ET9HP
1006000.GKAS_03119: ET9HP
1051646.IX91_13535: ET9HP
1122206.SAMN02745753_03616: ET9HP
1122207.MUS1_13180: ET9HP


Processing transfer thresholds:   0%|          | 0/10 [00:00<?, ?it/s]

Wn NOGwise HGTs:


Unnamed: 0,nog_id,transfers,transfer_threshold
0,EQRVP,1,13.0
1,EQSTU,1,13.0
2,EQTSP,1,13.0
3,EQUN1,1,13.0
4,EQUY3,2,13.0
...,...,...,...
957,ETC37,1,4.0
958,ETCDP,33,4.0
959,ETCEN,43,4.0
960,ETCI9,5,4.0


Wn NOGwise branchwise HGTs:


Unnamed: 0,nog_id,source_branch,recipient_branch,transfers
0,EQRFZ,unknown,966,6.0
1,EQRFZ,unknown,28173,5.0
2,EQRFZ,unknown,43263,5.0
3,EQRFZ,unknown,87626,5.0
4,EQRFZ,unknown,105559,5.0
...,...,...,...,...
16844,ETCIZ,unknown,1232683,4.0
16845,ETCIZ,unknown,1245471,4.0
16846,ETCIZ,unknown,1354303,4.0
16847,ETCIZ,unknown,1392540,4.0
