In [1]:
import glob
import os
import json
import dendropy
from dendropy.calculate import treecompare

In [2]:
def compare_trees(ref_tree, mutated_tree, is_bipartitions_updated=False):
    ref_tree.encode_bipartitions()
    mutated_tree.encode_bipartitions()
    sd = treecompare.symmetric_difference(ref_tree, mutated_tree)
    return (sd)
   

In [3]:
def compute_distance_between_trees(tree_file_path, reference_tree):
    with open(tree_file_path, mode='r') as reduced_noise_tree_file:
            reduced_noise_tree_str = ''.join(list(reduced_noise_tree_file)) 
            reduced_noise_tree = dendropy.Tree.get_from_string(
                reduced_noise_tree_str,
                schema="newick",
                taxon_namespace=tns
            )
    return compare_trees(reference_tree, reduced_noise_tree)

In [6]:
reference_tree_path

'/Users/user/Desktop/MSM/03-Applied_Bioinformatics/Labs/03_Project/protein_multial_noise_reduction/data/test_data/test_2/asymmetric_1.0.tree'

In [4]:
original_dir = '/Users/user/Desktop/MSM/03-Applied_Bioinformatics/Labs/03_Project/protein_multial_noise_reduction/data/test_data'
reduced_dir = '/Users/user/Desktop/MSM/03-Applied_Bioinformatics/Labs/03_Project/protein_multial_noise_reduction/data/reduced_test_data'
result_dir =  '/Users/user/Desktop/MSM/03-Applied_Bioinformatics/Labs/03_Project/protein_multial_noise_reduction/results'
directories = []
tns = dendropy.TaxonNamespace()

for folder in glob.glob(original_dir +'/*'):
    sub_folder_name = folder.split('/')[-1]
    directories.append(sub_folder_name)

compare_trees_dictionary = dict()

for directory in directories:  
    original_dir_path = os.path.join(original_dir, directory)
    new_folder_path = os.path.join(reduced_dir, directory)
    reference_tree_path = glob.glob(original_dir_path +'/*.tree')[0]
    with open(reference_tree_path, mode='r') as ref_tree_file:
        ref_tree_str = ''.join(list(ref_tree_file))
        reference_tree = dendropy.Tree.get_from_string(
        ref_tree_str,
        schema="newick",
        taxon_namespace=tns
    )
    folder_dict_key_name = original_dir_path.split('/')[-1]

    compare_trees_dictionary[folder_dict_key_name] = dict()
    
    for alignment_path in glob.glob(original_dir_path + '/*.msl'):
        # computing distance between ref tree and infered trees
        reduced_alignment_name = alignment_path.split('/')[-1]
        reduced_filename_out = os.path.join(new_folder_path, reduced_alignment_name)
        tree_outfile_reduced = reduced_filename_out[:-3] + 'tree'
        
        alignment_name = alignment_path.split('/')[-1]
        filename_out = os.path.join(original_dir_path, alignment_name)
        tree_outfile = filename_out[:-3] + 'tree'
        
        noise_reduced_distance = compute_distance_between_trees(tree_outfile_reduced, reference_tree)
        original_distance = compute_distance_between_trees(tree_outfile, reference_tree)
        
        alignment_key_name = alignment_name[:-3]
        compare_trees_dictionary[folder_dict_key_name][alignment_key_name] = (
            original_distance, noise_reduced_distance
        )
distance_results_path = os.path.join(result_dir, 'distance_result_dict')
with open(distance_results_path, 'w') as result_dir_file:
    json.dump(compare_trees_dictionary, result_dir_file)




In [5]:
compare_trees_dictionary

{'test': {'s001.align.1.': (4, 4),
  's002.align.1.': (6, 6),
  's003.align.1.': (8, 6),
  's004.align.1.': (6, 4),
  's005.align.1.': (6, 6),
  's006.align.1.': (12, 12),
  's007.align.1.': (6, 10),
  's008.align.1.': (8, 10),
  's009.align.1.': (6, 6),
  's010.align.1.': (8, 8)},
 'test_2': {'s001.align.1.': (6, 4),
  's002.align.1.': (8, 8),
  's003.align.1.': (12, 12),
  's004.align.1.': (10, 12),
  's005.align.1.': (6, 8),
  's006.align.1.': (14, 8),
  's007.align.1.': (14, 14),
  's008.align.1.': (12, 12),
  's009.align.1.': (10, 10),
  's010.align.1.': (10, 12)}}