In [None]:
# Fetch reviewed cytosolic Hsp70/Hsc70 orthologs
!pip -q install requests
import requests, time
from pathlib import Path

ACCESSIONS = {
    "Human HSPA8":        "P11142",
    "Mouse Hspa8":        "P63017",
    "Cow Hsc70":          "P19120",  # bovine anchor for numbering
    "Chicken HSPA8":      "O73885",
    "Zebrafish Hsc70":    "Q90473",
    "Xenopus Hsp70":      "P02827",
    "Drosophila Hsc70-4": "P11147",
    "C. elegans HSP-1":   "P09446",
    "S. cerevisiae SSA1": "P10591",
    "S. pombe Ssa1":      "Q10265",
    "Arabidopsis HSC70-1":"P22953",
}

def fetch_fasta(acc):
    url = f"https://rest.uniprot.org/uniprotkb/{acc}.fasta"
    r = requests.get(url, timeout=60)
    r.raise_for_status()
    txt = r.text.strip()
    if not txt.startswith(">"):
        raise ValueError(f"Unexpected FASTA for {acc}")
    return txt + ("\n" if not txt.endswith("\n") else "")

out = Path("msa_input_euk.fasta")
with out.open("w") as w:
    for label, acc in ACCESSIONS.items():
        fa = fetch_fasta(acc)
        w.write(fa)
        print(f"Added: {label} ({acc})")
        time.sleep(0.1)

print("\nWrote:", out.resolve())


Added: Human HSPA8 (P11142)
Added: Mouse Hspa8 (P63017)
Added: Cow Hsc70 (P19120)
Added: Chicken HSPA8 (O73885)
Added: Zebrafish Hsc70 (Q90473)
Added: Xenopus Hsp70 (P02827)
Added: Drosophila Hsc70-4 (P11147)
Added: C. elegans HSP-1 (P09446)
Added: S. cerevisiae SSA1 (P10591)
Added: S. pombe Ssa1 (Q10265)
Added: Arabidopsis HSC70-1 (P22953)

Wrote: /content/msa_input_euk.fasta


In [None]:
# Align the eukaryotic sequences with MAFFT and export in CLUSTAL format
!pip install biopython
!apt-get -qq update
!apt-get -qq install -y mafft
from Bio import AlignIO
from google.colab import files

# Run MAFFT
!mafft --auto msa_input_euk.fasta > aligned_euk.fasta

# Write CLUSTAL format for Chimera/ChimeraX Multalign Viewer
aln = AlignIO.read("aligned_euk.fasta", "fasta")
AlignIO.write(aln, "aligned_euk.aln", "clustal")
print("Wrote: aligned_euk.aln")

# Optional: quick text preview (first 320 columns)
aln_len = aln.get_alignment_length()
print(f"Alignment length: {aln_len}  |  Sequences: {len(aln)}")
for start in range(0, min(aln_len, 320), 80):
    end = min(start + 80, aln_len)
    print(f"\nColumns {start+1}-{end}")
    for rec in aln:
        print(f"{rec.id[:20]:20s} {str(rec.seq[start:end])}")

# Download the CLUSTAL file
files.download("aligned_euk.aln")


Collecting biopython
  Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl.metadata (13 kB)
Downloading biopython-1.86-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl (3.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.2/3.2 MB[0m [31m26.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: biopython
Successfully installed biopython-1.86
W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
outputhat23=16
treein = 0
compacttree = 0
stacksize: 8192 kb
rescale = 1
All-to-all alignment.
tbfast-pair (aa) Version 7.490
alg=L, model=BLOSUM62, 2.00, -0.10, +0.10, noshift, amax=0.0
0 thread(s)

outputhat23=16
Loading 'hat3.seed' ... 
done.
Writing hat3 for iterative refinement
rescale = 1
Gap Penalty = -1.53, +0.00, +0.00
tbutree = 1, c

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>