In [3]:
import urllib.request
import glob, os
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Align.Applications import ClustalOmegaCommandline
from Bio.Align import substitution_matrices
from termcolor import colored

def motif_finder(gene_list, motif):
    '''
    Protien motif finder. By @biomedical_informatics Edris Sharif Rahamni March 13, 2023
    ''' 
    uniprotkb_id = []
    fastas = []
    with open("uniprotkb.csv") as uniprot:
        for line in uniprot:
            line = line.strip().split(",")
            for gene in gene_list:
                if gene in line[2]:
                    uniprotkb_id.append(line[0])
                    fastas.append(f'{line[0]}.fasta')
    for uni_id in uniprotkb_id:
        url = f'https://rest.uniprot.org/uniprotkb/{uni_id}.fasta'
        urllib.request.urlretrieve(url, f'{uni_id}.fasta')
    with open("Seq.fa", 'w') as outfile:
        for fasta in fastas:
            with open(fasta) as infile:
                outfile.write(infile.read())
    for file in glob.glob("*.fasta"):
        os.remove(file)
    protein_sequences = list(SeqIO.parse("Seq.fa", "fasta"))
    with open("Match_Seq.fa", "w") as match, open("protein_alignment.fasta", "w"):
        for seq_record in protein_sequences:
            seq = str(seq_record.seq)
            if motif in seq:
                print('>',seq_record.id, file= match)
                print(seq, file= match)
    clustalomega_cline = ClustalOmegaCommandline(infile = "Match_Seq.fa",
                                             outfile = "protein_alignment.fasta",
                                             verbose = True,
                                             auto = True,
                                             force = True)
    clustalomega_cline()
    alignment = SeqIO.parse("protein_alignment.fasta", "fasta")
    motif_start = {}
    for seq_record in protein_sequences:
        seq = str(seq_record.seq)
        if motif in seq:
            motif_start[seq_record.id] = seq.index(motif)
    for record in alignment:
        print(f">{record.id}")
        for i in range(len(record.seq)):
            if record.id in motif_start and i >= motif_start[record.id] and i < motif_start[record.id] + len(motif):
                print(colored(record.seq[i], 'red'), end="")
            else:
                print(record.seq[i], end="")
        print()

In [4]:
motif_finder(["TP53"], motif = "ETFSDLWKL")

>sp|O09185|P53_CRIGR
MEEPQSDLSIELPLSQ[31mE[0m[31mT[0m[31mF[0m[31mS[0m[31mD[0m[31mL[0m[31mW[0m[31mK[0m[31mL[0mLPPNNVLSTLPSSDSIEELFLS-ENVTGWLEDSGGALQGVAAAA---ASTAEDPVTETPAPVASAPATPWPLSSSVPSYKTYQGDYGFRLGFLHSGTAKSVTCTYSPSLNKLFCQLAKTCPVQLWVNSTPPPGTRVRAMAIYKKLQYMTEVVRRCPHHERSSE-GDSLAPPQHLIRVEGNLHAEYLDDKQTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDPSGNLLGRNSFEVRICACPGRDRRTEEKNFQKKGEPCPELPPKSAKRALPTNT-SSSPPPKKKTLDGEYFTLKIRGHERFKMFQELNEALELKDAQASKGSEDNGAHSSYLKSKKGQSASRLKKLMIKREGPDSD
>sp|P04637|P53_HUMAN
MEEPQSDPSVEPPLSQ[31mE[0m[31mT[0m[31mF[0m[31mS[0m[31mD[0m[31mL[0m[31mW[0m[31mK[0m[31mL[0mLPENNVLSPLPSQ-AMDDLMLSPDDIEQWFTEDPGPDEA---PRMPEAAPPVAPAPAAPTPAAPAPAPSWPLSSSVPSQKTYQGSYGFRLGFLHSGTAKSVTCTYSPALNKMFCQLAKTCPVQLWVDSTPPPGTRVRAMAIYKQSQHMTEVVRRCPHHERCSD-SDGLAPPQHLIRVEGNLRVEYLDDRNTFRHSVVVPYEPPEVGSDCTTIHYNYMCNSSCMGGMNRRPILTIITLEDSSGNLLGRNSFEVRVCACPGRDRRTEEENLRKKGEPHHELPPGSTKRALPNNT-SSSPQPKKKPLDGEYFTLQIRGRERFEMFRELNEALELKDAQAGKEPGGSRAHSSHLKSKKGQSTSRHKKLMFKTEGP