In [None]:
from Bio import SeqIO
from Bio import Seq
from Bio.Align import PairwiseAligner
import pandas as pd
from Bio.Seq import Seq
from Bio import Phylo
from Bio.Phylo.TreeConstruction import DistanceCalculator, DistanceTreeConstructor
from Bio import AlignIO
from Bio.Align import MultipleSeqAlignment
from Bio.SeqRecord import SeqRecord

In [None]:
"""
Step 1: Import your practice sequences from the .fasta file
"""
# Path to your FASTA file
fasta_file = "practice_sequences.fasta"

# Load all sequences from the FASTA file
sequences = list(SeqIO.parse(fasta_file, "fasta"))

# Print all the sequences
for seq_record in sequences:
    globals()[seq_record.id] = str(seq_record.seq)
    print(f"{seq_record.id}: {seq_record.seq}")

In [None]:
"""
Step 2: Align your sequences and count their differences
a) Pick any two sequences from the list seq1, seq2...seq6, and input them where indicated. 
b) Run the cell and record their Alignment Score.
"""
def align_sequences(seqA, seqB):
    aligner = PairwiseAligner()
    alignments = aligner.align(seqA, seqB)
    return alignments[0]

#INPUT THE SEQUENCES YOU WANT TO ALIGN HERE; replace "seq1, seq2" with the two you want to compare
alignment = align_sequences(seq1, seq2)
print(alignment)
print(f"Alignment Score: {alignment.score}")

print(seq1)

In [None]:
"""
Step 3: Create an alignment chart for all the sequence pairs

"""
seqs_list = [seq_record.id for seq_record in sequences]
df = pd.DataFrame("", index=seqs_list, columns=seqs_list)
for i in range(1, len(seqs_list) + 1):
    s1 = globals()[f"seq{i}"]
    for j in range(i, len(seqs_list) + 1):
        s2 = globals()[f"seq{j}"]
        score = align_sequences(s1, s2).score
        df.loc[f"seq{i}", f"seq{j}"] = int(score)
df

In [None]:
"""
Step 4: Make the phylogenetic tree for the practice sequences!
"""

#align sequences
alignment = MultipleSeqAlignment(sequences)

#calculate distances between sequences
calculator = DistanceCalculator('identity')
distance_matrix = calculator.get_distance(alignment)

#make the tree from the distance matrix
constructor = DistanceTreeConstructor()
tree = constructor.upgma(distance_matrix)

#Remove inner clade labels
for clade in tree.find_clades():
    if "Inner" in clade.name:
        clade.name = None

# Print the phylogenetic tree
Phylo.draw(tree)
