In [1]:
!pip install biopython==1.85

Collecting biopython==1.85
  Downloading biopython-1.85-cp313-cp313-macosx_11_0_arm64.whl.metadata (13 kB)
Downloading biopython-1.85-cp313-cp313-macosx_11_0_arm64.whl (2.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.8/2.8 MB[0m [31m2.5 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: biopython
  Attempting uninstall: biopython
    Found existing installation: biopython 1.86
    Uninstalling biopython-1.86:
      Successfully uninstalled biopython-1.86
Successfully installed biopython-1.85

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m24.3.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m


In [27]:
from Bio import AlignIO
import subprocess
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor, DistanceCalculator


In [28]:
input_fasta = "best_hits_with_query.fasta"
msa_fasta = "best_hits_with_query.msa.fasta"
phylip_out = "best_hits_with_query.phy"
legend_out = "legend.txt"

subprocess.run(
    ["mafft", "--auto", input_fasta],
    stdout=open(msa_fasta, "w"),
    stderr=subprocess.DEVNULL,
    check=True
)

alignment = AlignIO.read(msa_fasta, "fasta")

used = {}
leg = open(legend_out, "w")

for rec in alignment:
    base = rec.id[:8]
    count = used.get(base, 0) + 1
    used[base] = count
    short = f"{base}{count:02d}"
    leg.write(f"{short} = {rec.id}\n")
    rec.id = short
    rec.name = short
    rec.description = ""

leg.close()

AlignIO.write(alignment, phylip_out, "phylip")


1

In [29]:
aln = AlignIO.read("best_hits_with_query.phy", "phylip")

# usar apenas as 15 melhores sequências
aln15 = aln[:15]

calculator = DistanceCalculator("identity")
dm = calculator.get_distance(aln15)

constructor = DistanceTreeConstructor()
upgmatree = constructor.upgma(dm)

print(upgmatree)

Tree(rooted=True)
    Clade(branch_length=0, name='Inner10')
        Clade(branch_length=0.004427823153409086, name='URY1067101')
        Clade(branch_length=0.0005104758522727236, name='Inner9')
            Clade(branch_length=0.0006769353693181854, name='Inner8')
                Clade(branch_length=0.0032404119318181768, name='YP_0092901')
                Clade(branch_length=0.0008433948863636187, name='Inner5')
                    Clade(branch_length=0.002397017045454558, name='WP_1139901')
                    Clade(branch_length=0.00044389204545455807, name='Inner4')
                        Clade(branch_length=0.001953125, name='URY1071401')
                        Clade(branch_length=0.0008877840909090884, name='Inner3')
                            Clade(branch_length=0.0010653409090909116, name='XPK4255501')
                            Clade(branch_length=0.0003551136363636187, name='Inner1')
                                Clade(branch_length=0.0007102272727272929, name='YP_0020

In [30]:
Phylo.draw_ascii(upgmatree)

  ________________________________________________________________ URY1067101
 |
 |                 _______________________________________________ YP_0092901
 |       _________|
_|      |         |            ___________________________________ WP_1139901
 |      |         |___________|
 |      |                     |       ____________________________ URY1071401
 |      |                     |______|
 |      |                            |             _______________ XPK4255501
 |______|                            |____________|
        |                                         |     __________ YP_0020001
        |                                         |____|
        |                                              |__________ YP_0098101
        |
        |             ____________________________________________ YP_0097902
        |____________|
                     |        ____________________________________ UXY9250301
                     |_______|
                             | 

In [31]:
aln = AlignIO.read("best_hits_with_query.phy", "phylip")

aln15 = aln[:15]

calculator = DistanceCalculator("identity")
dm = calculator.get_distance(aln15)

constructor = DistanceTreeConstructor()
njtree = constructor.nj(dm)

print(njtree)

Tree(rooted=False)
    Clade(branch_length=0, name='Inner9')
        Clade(branch_length=4.438920454548356e-05, name='Inner8')
            Clade(branch_length=0.0013760653409091023, name='Inner2')
                Clade(branch_length=0.0027521306818181594, name='YP_0092901')
                Clade(branch_length=8.87784090909012e-05, name='YP_0020001')
            Clade(branch_length=0.00022194602272727557, name='Inner7')
                Clade(branch_length=0.0011837121212121304, name='XPK4255501')
                Clade(branch_length=0.00023674242424242078, name='Inner6')
                    Clade(branch_length=0.0010653409090908821, name='Inner4')
                        Clade(branch_length=0.003906250000000024, name='URY1067101')
                        Clade(branch_length=0.001775568181818208, name='URY1071401')
                    Clade(branch_length=0.0007102272727272669, name='Inner5')
                        Clade(branch_length=0.0014914772727272678, name='Inner1')
                

In [32]:
Phylo.draw_ascii(njtree)

                  ________________________________ YP_0092901
  _______________|
 |               | YP_0020001
 |
 |   _____________ XPK4255501
 |  |
 |  |               _____________________________________________ URY1067101
 |__|  ____________|
 |  | |            |____________________ URY1071401
 |  | |
 |  |_|                          ______________ NP_0419801
 |    |         ________________|
_|    |        |                |_ YP_0097901
 |    |________|
 |             |       ___________________________________________ YP_0097902
 |             |______|
 |                    |_______________________ UXY9250301
 |
 | YP_0098101
 |
 |________________________________ WP_1139901

