In [11]:
!pip install biopython==1.85



In [12]:
from Bio import AlignIO
import subprocess
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceCalculator


In [13]:
input_fasta = "best_hits_with_query.fasta"
msa_fasta = "best_hits_with_query.msa.fasta"
phylip_out = "best_hits_with_query.phy"
legend_out = "legend.txt"

subprocess.run(
    ["mafft", "--auto", input_fasta],
    stdout=open(msa_fasta, "w"),
    stderr=subprocess.DEVNULL,
    check=True
)

alignment = AlignIO.read(msa_fasta, "fasta")

used = {}
leg = open(legend_out, "w")

for rec in alignment:
    base = rec.id[:8]
    count = used.get(base, 0) + 1
    used[base] = count
    short = f"{base}{count:02d}"
    leg.write(f"{short} = {rec.id}\n")
    rec.id = short
    rec.name = short
    rec.description = ""

leg.close()

AlignIO.write(alignment, phylip_out, "phylip")


1

In [14]:
aln = AlignIO.read("best_hits_with_query.phy", "phylip")

# usar apenas as 15 melhores sequÃªncias
aln15 = aln[:15]

calculator = DistanceCalculator("identity")
dm = calculator.get_distance(aln15)

constructor = DistanceTreeConstructor()
upgmatree = constructor.upgma(dm)

print(upgmatree)

Tree(rooted=True)
    Clade(branch_length=0, name='Inner10')
        Clade(branch_length=0.0752840909090909, name='XLQ2939201')
        Clade(branch_length=0.003056129476584027, name='Inner9')
            Clade(branch_length=0.018508953168044073, name='Inner8')
                Clade(branch_length=0.0537190082644628, name='WMX1867701')
                Clade(branch_length=0.03581267217630851, name='Inner3')
                    Clade(branch_length=0.017906336088154284, name='UYE9549001')
                    Clade(branch_length=0.017906336088154284, name='YP_0098201')
            Clade(branch_length=0.019972451790633602, name='Inner7')
                Clade(branch_length=0.02952823691460056, name='Inner5')
                    Clade(branch_length=0.022727272727272707, name='YP_0092901')
                    Clade(branch_length=0.006198347107437996, name='Inner2')
                        Clade(branch_length=0.01652892561983471, name='YP_0097901')
                        Clade(branch_length=0.

In [16]:
from Bio import Phylo
Phylo.draw_ascii(upgmatree)

  ________________________________________________________________ XLQ2939201
 |
 |                  ______________________________________________ WMX1867701
_|  _______________|
 | |               |                               _______________ UYE9549001
 | |               |______________________________|
 | |                                              |_______________ YP_0098201
 |_|
   |                                           ___________________ YP_0092901
   |                 _________________________|
   |                |                         |     ______________ YP_0097901
   |                |                         |____|
   |________________|                              |______________ QLF8570901
                    |
                    |                 ____________________________ XUE0382201
                    |                |
                    |________________|                  __________ URY1070001
                                     |            _____

In [17]:
aln = AlignIO.read("best_hits_with_query.phy", "phylip")

aln15 = aln[:15]

calculator = DistanceCalculator("identity")
dm = calculator.get_distance(aln15)

constructor = DistanceTreeConstructor()
njtree = constructor.nj(dm)

print(njtree)

Tree(rooted=False)
    Clade(branch_length=0, name='Inner9')
        Clade(branch_length=0.006241391184572974, name='Inner8')
            Clade(branch_length=0.04928546831955924, name='Inner3')
                Clade(branch_length=0.021153089334907497, name='YP_0097901')
                Clade(branch_length=0.002262888626525024, name='Inner2')
                    Clade(branch_length=0.027031680440771318, name='YP_0092901')
                    Clade(branch_length=0.011535812672176286, name='QLF8570901')
            Clade(branch_length=0.011664944903581276, name='Inner7')
                Clade(branch_length=0.012741046831955913, name='NP_0419601')
                Clade(branch_length=0.012052341597796126, name='Inner6')
                    Clade(branch_length=0.012741046831955927, name='URY1070001')
                    Clade(branch_length=0.012052341597796168, name='URY1065701')
        Clade(branch_length=0.0287103994490358, name='XUE0382201')
        Clade(branch_length=0.0201876721763085

In [18]:
Phylo.draw_ascii(njtree)

                                      ____________ YP_0097901
      _______________________________|
     |                               | _________________ YP_0092901
     |                               ||
  ___|                                |_______ QLF8570901
 |   |
 |   |       _______ NP_0419601
 |   |______|
 |          |        _______ URY1070001
_|          |_______|
 |                  |_______ URY1065701
 |
 |_________________ XUE0382201
 |
 |             ___________________________________________________ XLQ2939201
 |____________|
              |       ____________________________________ WMX1867701
              |______|
                     |                      ________________ UYE9549001
                     |_____________________|
                                           |_____ YP_0098201

