In [20]:
#! /usr/bin/env python
# -*- coding: utf-8 -*-

'''
Parse the output ancestral state reconstruction from IQ-TREE together with the topology
to create an ete3 tree with the ancestral sequences.
'''

from __future__ import division, print_function
import sys
import os
import re
from warnings import warn
from Bio import SeqIO
from ete3 import Tree, TreeNode, NodeStyle, TreeStyle, TextFace, add_face_to_node, CircleFace, faces, AttrFace
#sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../../bin')
#sys.path.append(os.path.dirname(os.path.realpath(__file__)) + '/../../tools')
from Bio import AlignIO


In [21]:
# Validation file containing csv of: cloneID, name, abundance, isotype set, chain, sequence
f = 'iso_test_rename1.csv'
naiveID = 'naive'

In [23]:
clone_dict = dict()
with open(f) as fh:
    for l in fh:
        cloneID, name, abundance, iso_set, chain, seq = l.strip().split(',')
        # Convertions:
        if name != naiveID:
            name = name + '_' + chain
        abundance = int(abundance)
        iso_set = set(iso_set.split(':')) if iso_set != '' else {}
        if not cloneID in clone_dict:
            clone_dict[cloneID] = dict()
        if not name in clone_dict[cloneID]:
            clone_dict[cloneID][name] = {'abundance':abundance, 'iso_set':iso_set, 'chain':chain, 'seq':seq}
        else:
            raise Exception('Found name and cloneID twice: {}-{}'.format(cloneID, name))

# Check that all naive sequences are there:
for clone in clone_dict.values():
    assert(naiveID in clone)

In [24]:
clone_dict

{'0': {'0_heavy': {'abundance': 1,
   'chain': 'heavy',
   'iso_set': {'IgA'},
   'seq': 'CAGGTCCAACTGCAGCAGCCTGGGGCTGAACTGGTAAAGCCTGGGGCTTCAGTGAAATTGTCCTGCAAGGCTTCTGGTTATATTTTCACCAGTTACTGGAAGCAATGGGTGAAGCAGAGGCCTGGACAAGGCCTTGAGTGGATTGGAATGATTCAGCCTAATAGTGGTAGTCTTAACTACAATGAGAAGTTCAAGAACAAGGCCACACTGACTGTAGACAAATCCTCCAGCACAGCCTACATGCAATTAAACACCCTGACATCTGAGGACTCTGCGGTCTATTTCTGTGCAAGAGGGGTCTACGGCAGTTATTACTATGCAATGGACTACTGGGGTCAAGGAGCCTCAGTCACCGTCTCCTCA'},
  '10_heavy': {'abundance': 1,
   'chain': 'heavy',
   'iso_set': {'IgA'},
   'seq': 'CAGGTCCAACTACAACAGCCTGGGGCTGAGCTGGTAAAGCCTGGGGCTTCAGTGAAGTTGTCCTGCAAGGCTTCTGGCTACATTTTCACCAGCTACTGGATGCACTGGGGGAAGCAGAGGCCTGGACAAGGCCTTGAGTGGATTGGAATGATTCAACCTAATAGTGGTAGTCTTACCTACAATGAGAAGTTCAAGAGCAAGGCCACACTGACTGTAGACAAATCCTCCAGCACAGCCTACATGCAACTCAGCAGCCTGACATCTGAGGACTCTGCGGTCTATTACTGTGCAAGAGGGGACTACGGTACTTATTACTATGCTATGGACTACTGGGGTCAAGGAACCTCAGTCACCGTCTCCTCA'},
  '11_heavy': {'abundance': 1,
   'chain': 'heavy',
   'iso_set': {'IgA'},
   'seq': 'CAGG

In [26]:
clone_dict['0']['0_heavy']['iso_set']

{'IgA'}

In [27]:
clone_dict['0']['10_heavy']['iso_set']

{'IgA'}

In [28]:
clone_dict['0']['93_heavy']['iso_set']

{'IgGb'}

In [29]:
# Get the intersection:
clone_dict['0']['0_heavy']['iso_set'] & clone_dict['0']['10_heavy']['iso_set']

{'IgA'}

In [30]:
clone_dict['0']['0_heavy']['iso_set'] & clone_dict['0']['93_heavy']['iso_set']

set()