In [None]:
#import library yang dibutuhkan
from Bio import SeqIO
from Bio.SeqRecord import SeqRecord
from Bio.Align import MultipleSeqAlignment
from Bio.Phylo.TreeConstruction import DistanceCalculator
import pandas as pd
import numpy as np
from scipy.cluster.hierarchy import dendrogram, linkage
import matplotlib.pyplot as plt
from tabulate import tabulate

In [None]:
#membaca file fasta yang telah di download
apoeAcipenser = list(SeqIO.parse("APOE-Acipenser.fasta", "fasta"))
apoeBos = list(SeqIO.parse("APOE-Bos.fasta", "fasta"))
apoeCercopithecus = list(SeqIO.parse("APOE-Cercopithecus.fasta", "fasta"))
apoeOryctolagus = list(SeqIO.parse("APOE-Oryctolagus.fasta", "fasta"))
apoePan = list(SeqIO.parse("APOE-Pan.fasta", "fasta"))
apoeSiniperca = list(SeqIO.parse("APOE-Siniperca.fasta", "fasta"))

In [None]:
#potong sequence dengan panjang paling APOE paling pendek
#hal ini dilakukan agar dapat dilakukan multiple sequence alignment
seqapoeAcipenser = apoeAcipenser[0].seq[:138]
seqapoeBos = apoeBos[0].seq[:138]
seqapoECercopithecus = apoeCercopithecus[0].seq[:138]
seqapoeOryctolagus= apoeOryctolagus[0].seq[:138]
seqapoePan = apoePan[0].seq[:138]
seqapoESiniperca = apoeSiniperca[0].seq[:138]


In [None]:
#membuat multiple sequence alignment
sequences = [
    SeqRecord(seqapoeAcipenser,id = 'Acipenser'),
    SeqRecord(seqapoeBos,id = 'Bos Taurus'),
    SeqRecord(seqapoECercopithecus,id = 'Cercoptithecus'),
    SeqRecord(seqapoeOryctolagus,id = 'Oryctolagus'),
    SeqRecord(seqapoePan,id = 'Pan troglodytes'),
    SeqRecord(seqapoESiniperca,id = 'Siniperca')  
]

In [None]:
# Membuat fungsi hamming distance
def hamming_distance(seq1, seq2):
    return sum(c1 != c2 for c1, c2 in zip(seq1, seq2))

In [None]:
#membuat matriks hamming distance
num_sequences = len(sequences)
hamming_matrix = np.zeros((num_sequences, num_sequences), dtype=int)

In [None]:
#mengisi matriks hamming distance
for i in range(num_sequences):
    for j in range(num_sequences):
        hamming_matrix[i, j] = hamming_distance(sequences[i].seq, sequences[j].seq)

In [None]:
# Mengonversi matriks menjadi DataFrame agar lebih rapi
sequence_ids = [seq.id for seq in sequences]
hamming_df = pd.DataFrame(hamming_matrix, index=sequence_ids, columns=sequence_ids)
# Menggunakan tabulate untuk mem print DataFrame dalam format tabel
table = tabulate(hamming_df, headers='keys', tablefmt='grid')

In [None]:
# Mencetak matriks Hamming distance
print("Matriks Hamming Distance:")
print(table)

In [None]:
# Buat dendrogram
align = MultipleSeqAlignment(sequences)
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(align)


In [None]:

# Membuat matriks Hamming
num_sequences = len(sequences)
hamming_matrix = np.zeros((num_sequences, num_sequences), dtype=int)
sequence_ids = [seq.id for seq in sequences]
hamming_df = pd.DataFrame(hamming_matrix, index=sequence_ids, columns=sequence_ids)

# Menghitung matriks jarak
dm_matrix = np.array(dm)
linkage_matrix = linkage(dm_matrix, method='average')

# Membuat plot dendrogram
fig, ax = plt.subplots(figsize=(10, 5))
dendrogram(linkage_matrix, labels=sequence_ids, orientation='right', ax=ax)

# Menyesuaikan plot
plt.xlabel('Jarak')
plt.ylabel('DNA sequence')
plt.show()
