# 1. Retrieve Protein Sequences from UniProt (FASTA)

In [1]:
# Human TP53
!curl -s "https://rest.uniprot.org/uniprotkb/P04637.fasta" -o TP53_HUMAN.fa

# Mouse TP53
!curl -s "https://rest.uniprot.org/uniprotkb/P02340.fasta" -o TP53_MOUSE.fa

# Rat TP53
!curl -s "https://rest.uniprot.org/uniprotkb/P10361.fasta" -o TP53_RAT.fa

# Zebrafish TP53
!curl -s "https://rest.uniprot.org/uniprotkb/P79734.fasta" -o TP53_DANRE.fa

# 2. Combine FASTA Sequences into One File for Multiple Sequence Alignment (MSA)

In [2]:
!cat TP53_HUMAN.fa TP53_MOUSE.fa TP53_RAT.fa TP53_DANRE.fa > tp53_multispecies.fa

#View contents of new file created
!cat tp53_multispecies.fa

>sp|P04637|P53_HUMAN Cellular tumor antigen p53 OS=Homo sapiens OX=9606 GN=TP53 PE=1 SV=4
MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLPSQAMDDLMLSPDDIEQWFTEDPGP
DEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAK
SVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHE
RCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNS
SCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELP
PGSTKRALPNNTSSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPG
GSRAHSSHLKSKKGQSTSRHKKLMFKTEGPDSD
>sp|P02340|P53_MOUSE Cellular tumor antigen p53 OS=Mus musculus OX=10090 GN=Tp53 PE=1 SV=4
MTAMEESQSDISLELPLSQETFSGLWKLLPPEDILPSPHCMDDLLLPQDVEEFFEGPSEA
LRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGNYGFHLGFLQSGTAKSVM
CTYSPPLNKLFCQLAKTCPVQLWVSATPPAGSRVRAMAIYKKSQHMTEVVRRCPHHERCS
DGDGLAPPQHLIRVEGNLYPEYLEDRQTFRHSVVVPYEPPEAGSEYTTIHYKYMCNSSCM
GGMNRRPILTIITLEDSSGNLLGRDSFEVRVCACPGRDRRTEEENFRKKEVLCPELPPGS
AKRALPTCTSASPPQKKKPLDGEYFTLKIRGRKRFEMFRELNEALELKDAHATEESGDSR
AHSSYLKTKKGQSTSRHKKTMVKKVGPDSD
>sp|P10361|P53_RAT Cel

# 3. Run Multiple Sequence Alignment (MSA)

In [3]:
!curl -s -X POST "https://www.ebi.ac.uk/Tools/services/rest/clustalo/run" \
--data-urlencode sequence@tp53_multispecies.fa \
--data-urlencode stype=protein \
--data-urlencode email=thaliasingh25@gmail.com

clustalo-R20251120-201521-0831-10216712-p1m

In [4]:
#Poll until complete
!curl -s "https://www.ebi.ac.uk/Tools/services/rest/clustalo/status/clustalo-R20251120-201521-0831-10216712-p1m"

FINISHED

In [5]:
!clustalo -i tp53_multispecies.fa -o tp53_multispecies_aligned.fasta -v

Using 22 threads
Read 4 sequences (type: Protein) from tp53_multispecies.fa
not more sequences (4) than cluster-size (100), turn off mBed
Calculating pairwise ktuple-distances...
Ktuple-distance calculation progress done. CPU time: 0.46u 0.30s 00:00:00.76 Elapsed: 00:00:00
Guide-tree computation done.
Progressive alignment progress done. CPU time: 29.22u 0.08s 00:00:29.29 Elapsed: 00:00:01
Alignment written to tp53_multispecies_aligned.fasta


In [6]:
!cat tp53_multispecies_aligned.fasta

>sp|P04637|P53_HUMAN Cellular tumor antigen p53 OS=Homo sapiens OX=9606 GN=TP53 PE=1 SV=4
---MEEPQSDPSVEPPLSQETFSDLWKLLPENNVLSPLP---SQAMDDL----ML----S
PDDIEQWFTEDPGPDEAPRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGS
YGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQS
QHMTEVVRRCPHHERCSDSDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVG
SDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEE
ENLRKKGEPHHELPP-GSTKRALPNNT-------SSSPQPKKKPLDGEYFTLQIRGRERF
EMFRELNEALELKDAQAGKEPGGSRAHSSHLKSK---KGQSTSRHKKLMFKTEGPDSD-
>sp|P02340|P53_MOUSE Cellular tumor antigen p53 OS=Mus musculus OX=10090 GN=Tp53 PE=1 SV=4
MTAMEESQSDISLELPLSQETFSGLWKLLPPEDILPS-----PHCMDDL----LL-----
PQDVEEFFE---GPSEALRVSGAPAAQDPVTETPGPVAPAPATPWPLSSFVPSQKTYQGN
YGFHLGFLQSGTAKSVMCTYSPPLNKLFCQLAKTCPVQLWVSATPPAGSRVRAMAIYKKS
QHMTEVVRRCPHHERCSDGDGLAPPQHLIRVEGNLYPEYLEDRQTFRHSVVVPYEPPEAG
SEYTTIHYKYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRDSFEVRVCACPGRDRRTEE
ENFRKKEVLCPELPP-GSAKRALPTCT-------SASPPQKKKPLDGEYFTLKIRGRKRF
EMFRELNEALELKDAHATEESGDSRAH

# 4. Identify the Position of Arg248 in the Alignment

In [7]:
from Bio import AlignIO
from Bio.Seq import Seq

alignment = AlignIO.read("tp53_multispecies_aligned.fasta", "fasta")

# 1. Find the human sequence row (using UniProt ID P04637)
human = [rec for rec in alignment if "P04637" in rec.id][0]

# 2. Remove gaps manually
human_nogaps = Seq(str(human.seq).replace("-", ""))

print("Length of gapless human TP53:", len(human_nogaps))
print("AA at position 248 (1-based):", human_nogaps[247])

#Mapping Arg248 back to the aligned column
target_residue_index = 248  # 1-based

aligned_index = None
residue_count = 0

for i, aa in enumerate(str(human.seq)):
    if aa != "-":
        residue_count += 1
    if residue_count == target_residue_index:
        aligned_index = i
        break

print("Aligned index for AA248:", aligned_index)
print("Aligned AA248 in human row:", human.seq[aligned_index])

Length of gapless human TP53: 393
AA at position 248 (1-based): R
Aligned index for AA248: 261
Aligned AA248 in human row: R


# 5. See the Conservation Column Across All Species

In [8]:
print("Arg248 column across species:")
for rec in alignment:
    print(rec.id, rec.seq[aligned_index])

Arg248 column across species:
sp|P04637|P53_HUMAN R
sp|P02340|P53_MOUSE R
sp|P10361|P53_RAT R
sp|P79734|P53_DANRE R
