In [1]:
import Bio
import os
from Bio.Seq import Seq
from Bio.Alphabet import IUPAC
from Bio.Alphabet import generic_dna
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
from Bio.Seq import MutableSeq
from Bio import AlignIO
from Bio.Align.Applications import ClustalwCommandline

In [2]:
handle = open('ciliate_ortholog.fasta','r')
seq_list = list(SeqIO.parse(handle,'fasta'))

def remove_set(sequence):
    i = sequence.seq.tomutable()
    if 's' in i:
        pos = i.index('s')
        i[int(pos):]=''
    else:
        return i 
    return i  

output_rna = open('ciliate_rna.fasta','w')
output_aa = open('ciliate_aa.fasta','w')

for i in seq_list:
    i_seq = remove_set(i)
    i_seq.alphabet = IUPAC.unambiguous_dna
    i_seq_toseq = i_seq.toseq()
    i_rna_seq=i_seq_toseq.transcribe()
    i_aa_seq=i_seq_toseq.translate(table=6,to_stop=True)
    i_name = i.id
    output_rna.write('>'+str(i_name)+'\n')
    output_rna.write(str(i_rna_seq)+'\n')
    output_aa.write('>'+str(i_name)+'\n')
    output_aa.write(str(i_aa_seq)+'\n')   
    
output_rna.close()
output_aa.close()    

In [3]:
clustalw_rna = ClustalwCommandline("clustalw2", infile="ciliate_rna.fasta",outfile="ciliate_rna.aln")
clustalw_aa = ClustalwCommandline("clustalw2", infile="ciliate_protein.fasta",outfile="ciliate_protein.aln")

In [5]:
align_rna = list(AlignIO.read("ciliate_rna.aln", "clustal"))
align_aa = list(AlignIO.read("ciliate_protein.aln", "clustal"))

In [7]:
output_file = open('Q3.fasta','w')

In [8]:
for i in align_rna:
    i_seq = i.seq
    i_id = i.id
    for j in align_aa:
        j_seq = j.seq
    output_file.write('>'+str(i_id)+'\n')
    output_file.write(str(i_seq)+'\n')
    output_file.write(str(j_seq)+'\n')


output_file.close()

In [9]:
from Bio import Phylo

In [14]:
tree_rna = Phylo.read("ciliate_rna.dnd","newick")
tree_protein = Phylo.read("ciliate_protein.dnd","newick")

In [15]:
Phylo.draw_ascii(tree_rna)
Phylo.draw_ascii(tree_protein)

                             _____________ PCAU.43c3d.1.T00500060
        ____________________|
  _____|                    |____________ PTET.51.1.T1450049
 |     |
 |     |______________________________ TTHERM_00463010glutathione
 |
 |                               ____ PCAU.43c3d.1.T00020306
 |          ____________________|
_|_________|                    |____ PTET.51.1.T1600001
 |         |
 |         |______________________ TTHERM_00339620ubiquitin/ribosomal
 |
 |                              _______ PCAU.43c3d.1.T00520076
 |                       ______|
 |______________________|      |_______ PTET.51.1.T0390188
                        |
                        |______________ TTHERM_00089240intraflagellar

                             _____________ PCAU.43c3d.1.T00500060
            ________________|
  _________|                |____________ PTET.51.1.T1450049
 |         |
 |         |_________________________ TTHERM_00463010glutathione
 |
 |                                   