In [1]:
import pandas as pd
import re
import itertools
import numpy as np
from matplotlib import pyplot as plt
import seaborn as sns
import igraph as ig
from IPython.display import HTML
import multiprocessing
import pickle as pkl
import random
from collections import Counter
import os
import subprocess
from scipy.stats import mannwhitneyu
import ete3
from copy import deepcopy
from math import ceil

ncbi = ete3.NCBITaxa()
%run assess_connections-functions.ipynb

%cd ~/work/eggNOG/

/nobackup1b/users/thiberio/eggNOG


In [2]:
sampled_genomes = pd.read_csv('../kelsey/genomes.tab',
                              sep='\t',
                              index_col=0)

lineages = pd.DataFrame()
for taxid in sampled_genomes.species_taxid.unique():
    if pd.isna(taxid):
        continue
    lineages = lineages.append({tax_rank: tmp_taxid 
                                 for tmp_taxid, tax_rank in ncbi.get_rank(ncbi.get_lineage(taxid)).items()},
                                ignore_index=True)
lineages = lineages.reindex(columns=['class', 'family',  'genus', 'phylum',
                                     'order', 'species', 'superkingdom']).copy()
lineages = lineages.query('superkingdom == 2').copy()

In [3]:
sampled_phyla = [phylum for phylum in lineages.phylum.unique().astype(int) if phylum > 0]

In [4]:
working_groups  = pd.read_parquet('working_eggNOG_groups.parquet', engine='fastparquet')
working_trees   = pd.read_parquet('working_eggNOG_trees.parquet' , engine='fastparquet')
eggNOG_taxonomy = pd.read_parquet('eggNOG_taxonomy.parquet'      , engine='fastparquet')

In [None]:
with open('all_results.pkl', 'rb') as _:
    phylum_connections = pkl.load(_)

In [6]:
regular_connections     = []
significant_connections = []

for group_id, group_clusters in phylum_connections:
    if not group_clusters:
        continue
    
    for cluster_num, cluster_data in group_clusters.items():

        for ref_phylum, phyla_dists in cluster_data.items():
            
            if not phyla_dists['df'].shape[0]:
                continue

            closest_phylum = phyla_dists['df'].iloc[0, 0]
            if not closest_phylum in sampled_phyla:
                continue

            if phyla_dists['significant']:
                significant_connections.append( (ref_phylum,
                                                 phyla_dists['df'].iloc[0, 0],
                                                 phyla_dists['df'].iloc[0, 1], 
                                                 '%s#%i' % (group_id, cluster_num)) )

                for index, tmp_series in phyla_dists['df'].iloc[1:, :].iterrows():
                    if tmp_series.phylum in sampled_phyla:
                        regular_connections.append( (ref_phylum,
                                                         tmp_series.phylum,
                                                         tmp_series['median'], 
                                                         '%s#%i' % (group_id, cluster_num)) )

            else:
                for index, tmp_series in phyla_dists['df'].iterrows():
                    if tmp_series.phylum in sampled_phyla:
                        regular_connections.append( (ref_phylum,
                                                         tmp_series.phylum,
                                                         tmp_series['median'], 
                                                         '%s#%i' % (group_id, cluster_num)) )

significant_df = pd.DataFrame(data=significant_connections, columns=['source', 'target', 'distance', 'group'])
regular_df     = pd.DataFrame(data=regular_connections,     columns=['source', 'target', 'distance', 'group'])

significant_df.dropna(how='any', inplace=True)
regular_df.dropna(    how='any', inplace=True)

In [150]:
#
# fastTree
#

def assess_transfers_between_phyla(related_phyla):
# related_phyla = (1090, 1117)

    candidates    = set(significant_df.query('source=="%i" and target=="%i"' % related_phyla).group.values).union(
                        significant_df.query('target=="%i" and source=="%i"' % related_phyla).group.values
                    )
    descriptions  = {f'from {related_phyla[0]} to {related_phyla[1]}': set(),
                     f'from {related_phyla[1]} to {related_phyla[0]}': set(),
                      'sisters':                                       set(),
                      'no clear relationship between phyla':           set()}

    for candidate in candidates:

        group_id, cluster_num = candidate.split('#')

        with cd('candidates/trees/'):
            #
            # test if there is rooted version of the tree (mad output) 
            if not os.path.isfile('%s-cluster%s.fastTree.rooted' % (group_id, cluster_num)):
                continue

            #
            # we need the original tree cause mad removes support values, so we just transfer root positions
            try:
                tmp_tree   = match_rooting(ete3.Tree('%s-cluster%s.fastTree.rooted' % (group_id, cluster_num)),
                                           ete3.Tree('%s-cluster%s.fastTree'        % (group_id, cluster_num)))
            except ete3.parser.newick.NewickError:
                continue

        #
        # extract taxIDs to subsample taxonomy tables
        taxids = {}
        for leaf in tmp_tree.get_leaf_names():
            #
            # leaf names are composed by <taxid>.<locus_tag>
            #   ps: locus_tag may also have "." within it
            tmp_taxid = int(leaf.split('.')[0])

            if tmp_taxid not in taxids:
                taxids[tmp_taxid] = []
            taxids[tmp_taxid].append(leaf)

        #
        # select taxIDs from each assessed phylum...
        phylum1_taxonomy = eggNOG_taxonomy.loc[taxids].query('phylum==@related_phyla[0]')
        phylum2_taxonomy = eggNOG_taxonomy.loc[taxids].query('phylum==@related_phyla[1]')

        #
        # ... and their respect leaves
        phylum1_leaves = set()
        for taxid in phylum1_taxonomy.index:
            phylum1_leaves.update(taxids[taxid])

        phylum2_leaves = set()
        for taxid in phylum2_taxonomy.index:
            phylum2_leaves.update(taxids[taxid])
        
        all_leaves = phylum1_leaves.union(phylum2_leaves)

        #
        # as tree traversing through ete3 isn't very efficient, and doesn't scalate very well,
        #   we create an iGraph dag for more effient traversing
        dag  = tree_to_dag(tmp_tree)          # we need a directed version...
        udag = dag.as_undirected(mode='each') # ... and an undirected one for different processes

        #
        # placeholders where we will add monophyletic clades for each phylum
        phylum1_clades = set() 
        phylum2_clades = set()

        ignored_nodes  = [] # descendants of monophyletic nodes should be ignored once their
                            #   ancestors have been stored.

        #
        # traverse through internal nodes
        for node in dag.vs:
            if node.index in ignored_nodes:
                continue

            node_leaves = get_leaf_names(node)

            if all_leaves.isdisjoint(node_leaves):
                ignored_nodes.extend(get_descendant_indices(node, leaves=True))

            #
            # if there aren't leaves from other phyla within this node it is monophyletic
            if phylum1_leaves.intersection(node_leaves) and phylum1_leaves.issuperset(node_leaves):
                phylum1_clades.add(node.index)
                ignored_nodes.extend(get_descendant_indices(node, leaves=True))

            elif phylum2_leaves.intersection(node_leaves) and phylum2_leaves.issuperset(node_leaves):
                phylum2_clades.add(node.index)
                ignored_nodes.extend(get_descendant_indices(node, leaves=True))

        #
        # now we add some flexibility to the monophyly of nodes for three reasons:
        #   1) donor nodes within gene trees will never be monophyletic since the recipient
        #      must be nested within it.
        #   2) if there are other transfers from the donor and/or recipient phyla to a 3rd one
        #      we still want to capture it.
        #   3) good ol' phylogenetic uncertainty
        phylum1_clades = merge_polyphyletic_clades(phylum1_clades, udag)
        phylum2_clades = merge_polyphyletic_clades(phylum2_clades, udag)

        #
        # flag if we can identify relations between phyla:
        #   1 nested within 2
        #   2 nested within 1
        #   1 and 2 are sisters
        #
        # if no relation is identified, leave as false
        phyla_relationship_flag = 0

        for clade1, clade2 in itertools.product(phylum1_clades, phylum2_clades):

            #
            # capture ancestors of clade1 by querying nodes between itself and the root node
            clade1_ancestors = udag.vs[clade1].get_shortest_paths(udag.vs[0])[0][1:]
            #
            # if clade2 within clade1 ancestors it means that clade1 is nested within clade2
            #   evidence of transfer from clade2 -> clade1
            if clade2 in clade1_ancestors:
#                 support = dag.vs[clade2].in_edges()[0]['support']
#                 print('%i is nested within %i             (%s)' % (related_phyla[0], related_phyla[1], candidate), support)
                recipient_taxon         = fine_tune_recipient(dag.vs[clade1], 
                                                              related_phyla[0])
                donor_taxon             = fine_tune_recipient(dag.vs[clade2], 
                                                              related_phyla[1])
                phyla_relationship_flag = 1

                if (recipient_taxon, donor_taxon) != related_phyla:
                    descriptions[f'from {related_phyla[1]} to {related_phyla[0]}'].add('%s from %i to %i' % \
                                                                                       (candidate, donor_taxon, 
                                                                                        recipient_taxon))
                else:
                    descriptions[f'from {related_phyla[1]} to {related_phyla[0]}'].add(candidate)
                continue

            #
            # if clade1 within clade2 ancestors it means that clade2 is nested within clade1
            #   evidence of transfer from clade1 -> clade2
            clade2_ancestors = udag.vs[clade2].get_shortest_paths(udag.vs[0])[0][1:]
            if clade1 in clade2_ancestors:
#                 support = dag.vs[clade1].in_edges()[0]['support']
#                 print('%i is nested within %i             (%s)' % (related_phyla[1], related_phyla[0], candidate), support)
                recipient_taxon         = fine_tune_recipient(dag.vs[clade2], 
                                                              related_phyla[1])
                donor_taxon             = fine_tune_recipient(dag.vs[clade1], 
                                                              related_phyla[0])
                phyla_relationship_flag = 1

                if (donor_taxon, recipient_taxon) !=  related_phyla:
                    descriptions[f'from {related_phyla[0]} to {related_phyla[1]}'].add('%s from %i to %i' % \
                                                                                       (candidate, donor_taxon, 
                                                                                        recipient_taxon))
                else:
                    descriptions[f'from {related_phyla[0]} to {related_phyla[1]}'].add(candidate)
                continue

            #
            # if clade1 and clade2 are have the same parent noe it means they are sisters
            #   evidence of hgt, but information about directionality
            if clade1_ancestors[0] == clade2_ancestors[0]:
#                 if dag.vs[clade1].predecessors()[0].index:
#                     support = dag.vs[clade1].predecessors()[0].in_edges()[0]['support']
#                     print('%i and %i are sisters              (%s)' % (related_phyla[0], related_phyla[1], candidate), support)
                descriptions['sisters'].add(candidate)
#                 else:
#                     print('%i and %i are sisters              (%s)' % (related_phyla[0], related_phyla[1], candidate), 'root')
                phyla_relationship_flag = 1
                continue

    #     with cd('candidates/'):
    #         with open('%s-cluster%s.figTree' % (group_id, cluster_num), 'w') as out:
    #             out.write(visualize_reconstruct_candidate(tmp_tree))

        #
        # if the flag still is False, we couldn't identify an relationship between phyla
        if not phyla_relationship_flag:
            descriptions['no clear relationship between phyla'].add(candidate)

    return(descriptions)

In [161]:
descriptions = {}
for phylum_pair in itertools.combinations(sampled_phyla, 2):
# for phylum_pair in [(200795, 1117)]:
#     if 1224 in phylum_pair:
#         continue
    
    print(phylum_pair)
    descriptions[phylum_pair] = assess_transfers_between_phyla(phylum_pair)
    
# descriptions = assess_transfers_between_phyla((200795, 1117))

(1090, 1117)
(1090, 1224)
(1090, 200795)
(1090, 976)
(1090, 1134404)
(1090, 1798710)
(1117, 1224)
(1117, 200795)
(1117, 976)
(1117, 1134404)
(1117, 1798710)
(1224, 200795)
(1224, 976)
(1224, 1134404)
(1224, 1798710)
(200795, 976)
(200795, 1134404)
(200795, 1798710)
(976, 1134404)
(976, 1798710)
(1134404, 1798710)


In [174]:
candidates_to_improve = set()
for pair, description in descriptions.items():
    if 1224 in pair:
        continue
        
    print('%i-%i' % pair)
    for key, value in description.items():
        if re.match('from \d+ to \d+$', key):
            for candidate in value:
                candidates_to_improve.add(candidate.split()[0])
            
        print('  ', key+':', len(value))
    
    print()

1090-1117
   from 1090 to 1117: 1
   from 1117 to 1090: 6
   sisters: 5
   no clear relationship between phyla: 11

1090-200795
   from 1090 to 200795: 1
   from 200795 to 1090: 2
   sisters: 5
   no clear relationship between phyla: 4

1090-976
   from 1090 to 976: 0
   from 976 to 1090: 171
   sisters: 48
   no clear relationship between phyla: 78

1090-1134404
   from 1090 to 1134404: 0
   from 1134404 to 1090: 0
   sisters: 0
   no clear relationship between phyla: 0

1090-1798710
   from 1090 to 1798710: 0
   from 1798710 to 1090: 0
   sisters: 0
   no clear relationship between phyla: 0

1117-200795
   from 1117 to 200795: 24
   from 200795 to 1117: 18
   sisters: 14
   no clear relationship between phyla: 21

1117-976
   from 1117 to 976: 10
   from 976 to 1117: 45
   sisters: 17
   no clear relationship between phyla: 30

1117-1134404
   from 1117 to 1134404: 0
   from 1134404 to 1117: 0
   sisters: 0
   no clear relationship between phyla: 0

1117-1798710
   from 1117 to 17987

In [180]:
for candidate in candidates_to_improve:
    group_id, cluster_num = candidate.split('#')
    
    if not os.path.isfile(f'candidates/alignments/{group_id}-cluster{cluster_num}.aln'):
        print(candidate)

In [182]:
with open('candidates/candidates_to_run_iqtree', 'w') as out:
    
    for candidate in candidates_to_improve:
        group_id, cluster_num = candidate.split('#')
        out.write(f'{group_id}-cluster{cluster_num}\n')

In [165]:
print('\n'.join(descriptions[(1117,1224)]['from 1224 to 1117']))

COG4279#4
32RWS#1
COG5654#0 from 1224 to 33071
COG1932#1
COG5622#0
COG4329#1
COG4453#0
COG2939#5
COG3626#1
COG0229#2
COG1393#0
COG0483#2
COG2059#11
COG3950#5
COG1322#6
COG4603#5
COG0783#0
COG1875#0
COG3607#3
COG0006#0 from 1224 to 33071
COG5350#0 from 28211 to 1117
31YEN#1
COG2906#0
COG1791#1
32XNF#0 from 1236 to 1117
COG4778#0
COG0857#1
COG2957#2
COG4392#0
331H5#1
COG0229#2 from 1224 to 33071
COG0792#0
COG0354#1 from 28211 to 1117
COG3861#0 from 1224 to 1416614
COG4325#2
32VVI#0
COG5463#0
COG0031#0 from 1224 to 1416614
COG4633#3
COG2243#3
COG3454#4
COG4328#3
32TNJ#0 from 28211 to 1117
COG0557#1
COG1115#0
32YVY#2
COG0303#3
COG4338#0
COG1392#0
COG0008#0
COG0511#3
COG4222#0
COG2371#1 from 28211 to 1117
COG1260#1
COG5309#5
COG0727#0
COG0361#2
COG4222#0 from 1224 to 33072
COG0144#0
COG3379#3
COG1117#4
COG1262#6
COG1204#5
COG0317#9
COG3591#1
COG3450#0
COG0540#1
COG0002#0
COG3251#0
COG3614#0
COG1682#0
COG3636#3
COG3657#2
COG5554#1
COG5662#0 from 1224 to 33072
COG3576#5
COG0166#1
COG0402#4
CO

In [134]:
related_phyla = (200795, 1117)
descriptions  = {f'from {related_phyla[0]} to {related_phyla[1]}': set(),
                 f'from {related_phyla[1]} to {related_phyla[0]}': set(),
                  'sisters':                                       set(),
                  'no clear relationship between phyla':           set()}
candidates    = set(significant_df.query('source=="%i" and target=="%i"' % related_phyla).group.values).union(
                    significant_df.query('target=="%i" and source=="%i"' % related_phyla).group.values
                )

for candidate in candidates:

    group_id, cluster_num = candidate.split('#')

    with cd('candidates/trees/'):
        #
        # test if there is rooted version of the tree (mad output) 
        if not os.path.isfile('%s-cluster%s.fastTree.rooted' % (group_id, cluster_num)):
            continue

        #
        # we need the original tree cause mad removes support values, so we just transfer root positions
        try:
            tmp_tree   = match_rooting(ete3.Tree('%s-cluster%s.fastTree.rooted' % (group_id, cluster_num)),
                                       ete3.Tree('%s-cluster%s.fastTree'        % (group_id, cluster_num)))
        except ete3.parser.newick.NewickError:
            continue

    #
    # extract taxIDs to subsample taxonomy tables
    taxids = {}
    for leaf in tmp_tree.get_leaf_names():
        #
        # leaf names are composed by <taxid>.<locus_tag>
        #   ps: locus_tag may also have "." within it
        tmp_taxid = int(leaf.split('.')[0])

        if tmp_taxid not in taxids:
            taxids[tmp_taxid] = []
        taxids[tmp_taxid].append(leaf)

    #
    # select taxIDs from each assessed phylum...
    phylum1_taxonomy = eggNOG_taxonomy.loc[taxids].query('phylum==@related_phyla[0]')
    phylum2_taxonomy = eggNOG_taxonomy.loc[taxids].query('phylum==@related_phyla[1]')

    #
    # ... and their respect leaves
    phylum1_leaves = set()
    for taxid in phylum1_taxonomy.index:
        phylum1_leaves.update(taxids[taxid])

    phylum2_leaves = set()
    for taxid in phylum2_taxonomy.index:
        phylum2_leaves.update(taxids[taxid])

    all_leaves = phylum1_leaves.union(phylum2_leaves)

    #
    # as tree traversing through ete3 isn't very efficient, and doesn't scalate very well,
    #   we create an iGraph dag for more effient traversing
    dag  = tree_to_dag(tmp_tree)          # we need a directed version...
    udag = dag.as_undirected(mode='each') # ... and an undirected one for different processes

    #
    # placeholders where we will add monophyletic clades for each phylum
    phylum1_clades = set() 
    phylum2_clades = set()

    ignored_nodes  = [] # descendants of monophyletic nodes should be ignored once their
                        #   ancestors have been stored.

    #
    # traverse through internal nodes
    for node in dag.vs:
        if node.index in ignored_nodes:
            continue

        node_leaves = get_leaf_names(node)

        if all_leaves.isdisjoint(node_leaves):
            ignored_nodes.extend(get_descendant_indices(node, leaves=True))

        #
        # if there aren't leaves from other phyla within this node it is monophyletic
        if phylum1_leaves.intersection(node_leaves) and phylum1_leaves.issuperset(node_leaves):
            phylum1_clades.add(node.index)
            ignored_nodes.extend(get_descendant_indices(node, leaves=True))

        elif phylum2_leaves.intersection(node_leaves) and phylum2_leaves.issuperset(node_leaves):
            phylum2_clades.add(node.index)
            ignored_nodes.extend(get_descendant_indices(node, leaves=True))

    #
    # now we add some flexibility to the monophyly of nodes for three reasons:
    #   1) donor nodes within gene trees will never be monophyletic since the recipient
    #      must be nested within it.
    #   2) if there are other transfers from the donor and/or recipient phyla to a 3rd one
    #      we still want to capture it.
    #   3) good ol' phylogenetic uncertainty
    phylum1_clades = merge_polyphyletic_clades(phylum1_clades, udag)
    phylum2_clades = merge_polyphyletic_clades(phylum2_clades, udag)

    #
    # flag if we can identify relations between phyla:
    #   1 nested within 2
    #   2 nested within 1
    #   1 and 2 are sisters
    #
    # if no relation is identified, leave as false
    phyla_relationship_flag = 0

    for clade1, clade2 in itertools.product(phylum1_clades, phylum2_clades):

        clade1_ancestors = udag.vs[clade1].get_shortest_paths(udag.vs[0])[0][1:]
        if clade2 in clade1_ancestors:
            recipient_taxon         = fine_tune_recipient(dag.vs[clade1], 
                                                          related_phyla[0])
            donor_taxon             = fine_tune_recipient(dag.vs[clade2], 
                                                          related_phyla[1])
            phyla_relationship_flag = 1
            
            if (recipient_taxon, donor_taxon) != related_phyla:
                descriptions[f'from {related_phyla[1]} to {related_phyla[0]}'].add('%s from %i to %i' % \
                                                                                   (candidate, donor_taxon, 
                                                                                    recipient_taxon))
            else:
                descriptions[f'from {related_phyla[1]} to {related_phyla[0]}'].add(candidate)
            continue

        clade2_ancestors = udag.vs[clade2].get_shortest_paths(udag.vs[0])[0][1:]
        if clade1 in clade2_ancestors:
            recipient_taxon         = fine_tune_recipient(dag.vs[clade2], 
                                                          related_phyla[1])
            donor_taxon             = fine_tune_recipient(dag.vs[clade1], 
                                                          related_phyla[0])
            phyla_relationship_flag = 1
            
            if (donor_taxon, recipient_taxon) !=  related_phyla:
                descriptions[f'from {related_phyla[0]} to {related_phyla[1]}'].add('%s from %i to %i' % \
                                                                                   (candidate, donor_taxon, 
                                                                                    recipient_taxon))
            else:
                descriptions[f'from {related_phyla[0]} to {related_phyla[1]}'].add(candidate)
            continue

descriptions

{'from 200795 to 1117': {'COG0001#4',
  'COG0049#2',
  'COG0345#1',
  'COG0354#0',
  'COG0499#0',
  'COG0554#3',
  'COG0637#4',
  'COG1075#1 from 32064 to 1117',
  'COG1086#8',
  'COG1177#0',
  'COG1304#5',
  'COG1336#1 from 32064 to 1117',
  'COG1351#7 from 32061 to 1117',
  'COG1554#0',
  'COG1704#1',
  'COG2896#1',
  'COG3259#2',
  'COG3379#4 from 32064 to 1117'},
 'from 1117 to 200795': {'COG0115#2 from 1117 to 189775',
  'COG0828#0',
  'COG1517#2 from 1117 to 32061',
  'COG1666#2',
  'COG1894#0 from 1117 to 32064',
  'COG1905#1 from 1117 to 133453',
  'COG1905#1 from 1117 to 32064',
  'COG1941#3 from 1117 to 120961',
  'COG1941#3 from 1117 to 32064',
  'COG2343#0 from 1117 to 104176',
  'COG2343#0 from 1117 to 120961',
  'COG2452#1',
  'COG2981#4 from 1117 to 32064',
  'COG4118#7 from 1117 to 32064',
  'COG4467#0 from 1117 to 32064',
  'COG4663#1',
  'COG4978#0',
  'COG4978#0 from 1117 to 363277',
  'COG5000#0 from 1117 to 133453',
  'COG5000#0 from 1117 to 2057',
  'COG5000#0 fro

In [129]:
fine_tune_recipient(dag.vs[clade1], related_phyla[0])

32064.0

In [116]:
candidate, donor_taxon, recipient_taxon

('COG4118#7', 1117.0, None)

In [117]:
clade1, clade2

(94, 2)

In [125]:
def fine_tune_recipient(node, phylum):
    node_taxids   = [int(leaf.split('.')[0]) for leaf in get_leaf_names(node)]
    node_taxonomy = eggNOG_taxonomy.loc[node_taxids, 
                                             ['phylum', 'class', 'order', 
                                              'family', 'genus', 'species']
                                            ].query(f'phylum=={phylum}')

    most_specific_taxon = None
    for rank, column in node_taxonomy.iteritems():
        unique_taxa = column.unique()

        if pd.notna(unique_taxa).all() and len(unique_taxa) == 1:
            most_specific_taxon = unique_taxa[0]
        else:
            break
    
    return(most_specific_taxon)

In [120]:
dag.vs[clade1]

igraph.Vertex(<igraph.Graph object at 0x2aab6b271250>, 94, {'name': 'node_94', 'is_leaf': False})

In [122]:
node_taxids   = [int(leaf.split('.')[0]) for leaf in get_leaf_names(dag.vs[clade1])]

In [123]:
node_taxids

[1521187, 383372, 326427, 324602, 357808]

In [124]:
eggNOG_taxonomy.loc[node_taxids]

Unnamed: 0_level_0,class,family,genus,phylum,order,species,superkingdom
index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
1521187,32061.0,1106.0,1107.0,200795.0,32064.0,1521187.0,2.0
383372,32061.0,1508635.0,120961.0,200795.0,32064.0,120962.0,2.0
326427,32061.0,1106.0,1107.0,200795.0,32064.0,152260.0,2.0
324602,32061.0,1106.0,1107.0,200795.0,32064.0,1108.0,2.0
357808,32061.0,1508635.0,120961.0,200795.0,32064.0,357808.0,2.0


In [127]:
fine_tune_recipient(dag.vs[clade1], 200795)

32064.0