In [6]:
from Bio import SeqIO
from Bio.Align.Applications import MafftCommandline
from Bio import AlignIO
from Bio.Phylo.TreeConstruction import DistanceCalculator
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
from Bio import Phylo

# Step 1: Read the protein sequences from the FASTA file
fasta_file = "Ptetraurelia_CenH3_1_hits_default_blastp.fa.extractFromTree.CenH3"
sequences = list(SeqIO.parse(fasta_file, "fasta"))

# Step 2: Perform multiple sequence alignment using MAFFT
alignment_file = fasta_file + ".aligned"
mafft_cline = MafftCommandline(input=fasta_file)
stdout, stderr = mafft_cline()
with open(alignment_file, "w") as handle:
    handle.write(stdout)

# Step 3: Calculate the distance matrix
alignment = AlignIO.read(alignment_file, "fasta")
calculator = DistanceCalculator("identity")
distance_matrix = calculator.get_distance(alignment)

# Step 4: Construct the phylogenetic tree
constructor = DistanceTreeConstructor()
tree = constructor.nj(distance_matrix)

# Step 5: Save the tree to a file
tree_file = fasta_file + ".aligned.nwk"
Phylo.write(tree, tree_file, "newick")

print("Phylogenetic tree created and saved to", tree_file)





Phylogenetic tree created and saved to Ptetraurelia_CenH3_1_hits_default_blastp.fa.extractFromTree.CenH3.aligned.nwk
