# Uniprot

In [16]:
from Bio import SeqIO

def analyze_uniprot(file_path):

    with open(file_path, "r") as handle:
        for record in SeqIO.parse(handle, "swiss"):
            print(f"ID: {record.id}")
            print(f"Nome: {record.name}")
            print(f"Descrição: {record.description}")
            print(f"Organismo: {record.annotations.get('organism')}")
            print(f"Função: {record.annotations.get('comment_function')}")
            print(f"Domínios: {record.annotations.get('keywords')}")
            
analyze_uniprot("uniprotkb_WP_003546577_1_2025_01_02.txt")

ID: Q5FL64
Nome: Q5FL64_LACAC
Descrição: RecName: Full=Alpha-1,4 glucan phosphorylase {ECO:0000256|RuleBase:RU000587}; EC=2.4.1.1 {ECO:0000256|RuleBase:RU000587};
Organismo: Lactobacillus acidophilus (strain ATCC 700396 / NCK56 / N2 / NCFM)
Função: None
Domínios: ['Carbohydrate metabolism {ECO:0000256|ARBA:ARBA00023277,', 'ECO:0000256|RuleBase:RU000587}', 'Glycosyltransferase {ECO:0000256|ARBA:ARBA00022676,', 'ECO:0000256|RuleBase:RU000587}', 'Pyridoxal phosphate {ECO:0000256|ARBA:ARBA00022898,', 'ECO:0000256|PIRSR:PIRSR000460-1}', 'Reference proteome', 'Transferase']


# PDB

In [29]:
from Bio.Blast import NCBIWWW, NCBIXML

def search_pdb_for_protein(sequence):
    result_handle = NCBIWWW.qblast("blastp", "pdb", sequence)
    blast_record = NCBIXML.read(result_handle)
    for alignment in blast_record.alignments:
        print(f"Alignment with PDB entry: {alignment.title}")
        print(f"PDB ID: {alignment.title.split()[0]}")
        print(f"E-value: {alignment.hsps[0].expect:.3e}")
        print("-" * 50)

# WP_003546577.1 protein sequence
protein_seq = "MQLTKDEFKKKLKNKVNIYFEEELDEASNSELYTALSGVVRDGYAPQWRRTRISEADQGQKQVYYFSIEFLPGTLLKTNLLNLGWLDTVRAALSDLGLDLDKIAAAEPDMALGNGGLGRLAAAFMDSLASTGYTGNGNGIRYKYGLFKQKFVNGYQKELPNDWLKKDDHWEVRRESKSVLVRFGGKVRMVDDNGWMTPQYEGGDVVRAVPYDTAIVGYRDGVTNTLRLWDAEIPPEEELSYPTISDRRRIEDLTSILYPDDSNYEGRLLRLKQEYFFVSAGLQSILDYYVKKLGNKDFTKLPDYVAVHINDTHPAMAIAELMRLLVDEHRVDWETAWDITLKVMSYTNHTIMSEAMEKWDTNMLSQLLPRIMQIITEIDRRYCAYLNGQVSNDVIERTRIIKNGQVQMAHLAIIGSHSINGVAALHTQLLETKVLKDFYNLYPDRFNNKTNGITLRRWLQIANPELSDLLDQTIGKDWRKNSDKMLNFEKYYNDTLVLERINQIKLDNKKKLAEFIKEQMGVEVDPNAIFDVQVKRLHEYKRQTLKLLHILKLYQDLKAGIDHPKRVVIFGAKAAPSYVFAKQVIKVINETANMINSDPDINGKLKVIFLENYDVSLAEKIIPAADVSEQISTTTKEASGTSNMKLMANGALTVATMDGANIEIADAVGEDNIITFGLNKDQVYKYYAEHSYHPREMYESDPVMKKTVDALTDGTIPNCFSEGQALANKFLSDNEQFFVLADFADYLKAQEKVEQEWKDKHSWAQMSLVNIAHSERFDVDKTIERYAQDIWHLKKLKVEKVDH"  # Replace with the actual sequence for WP_003546577.1
search_pdb_for_protein(protein_seq)

Alignment with PDB entry: pdb|2C4M|A Chain A, GLYCOGEN PHOSPHORYLASE [Corynebacterium callunae] >pdb|2C4M|B Chain B, GLYCOGEN PHOSPHORYLASE [Corynebacterium callunae] >pdb|2C4M|C Chain C, GLYCOGEN PHOSPHORYLASE [Corynebacterium callunae] >pdb|2C4M|D Chain D, GLYCOGEN PHOSPHORYLASE [Corynebacterium callunae]
PDB ID: pdb|2C4M|A
E-value: 0.000e+00
--------------------------------------------------
Alignment with PDB entry: pdb|7TM7|A Chain A, Alpha-1,4 glucan phosphorylase [Klebsiella pneumoniae subsp. pneumoniae HS11286] >pdb|7TM7|B Chain B, Alpha-1,4 glucan phosphorylase [Klebsiella pneumoniae subsp. pneumoniae HS11286]
PDB ID: pdb|7TM7|A
E-value: 0.000e+00
--------------------------------------------------
Alignment with PDB entry: pdb|2GM9|A Chain A, Glycogen phosphorylase, muscle form [Oryctolagus cuniculus] >pdb|5MCB|A Chain A, Glycogen phosphorylase, muscle form [Oryctolagus cuniculus] >pdb|7ONF|A Chain A, Glycogen phosphorylase, muscle form [Oryctolagus cuniculus]
PDB ID: pdb|2GM9

In [32]:
#Retirar foto?

from Bio.PDB import *
import pymol
def PDB(ID):
    
    pdbl = PDBList()
    file = pdbl.retrieve_pdb_file(ID, pdir='.', file_format='pdb')
    pymol.finish_launching()

    pymol.cmd.load(file, ID)
    pymol.cmd.disable("all")
    pymol.cmd.enable(ID)
    pymol.cmd.orient()
    pymol.cmd.zoom()
    pymol.cmd.png("ORF1_2C4M.png", 3000, 3000, dpi=500, ray=1)

    pymol.cmd.quit()

PDB("2C4M")

ModuleNotFoundError: No module named 'pymol'

In [34]:
#Retirar coordenadas e fazer de todos?

from Bio.PDB import PDBList, PDBParser

def download_pdb(pdb_id):
    pdbl = PDBList()
    pdb_file = pdbl.retrieve_pdb_file(pdb_id, file_format="pdb", overwrite=True)
    return pdb_file

def analyze_pdb_structure(pdb_file):
    
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)

    print(f"Número de modelos na estrutura: {len(structure)}")
    for model in structure:
        print(f"ID Modelo: {model.get_id()}")
        for chain in model:
            print(f"ID Cadeia: {chain.get_id()}")
            for residue in chain:
                print(f"    Residuo: {residue.get_resname()} {residue.get_id()[1]}")
                for atom in residue:
                    print(f"      Atomo: {atom.get_name()}, Coordenadas: {atom.get_coord()}")

pdb_id = "2C4M"
pdb_file = download_pdb(pdb_id)
analyze_pdb_structure(pdb_file)

Downloading PDB structure '2c4m'...
Número de modelos na estrutura: 1
ID Modelo: 0
ID Cadeia: A
    Residuo: GLN 5
      Atomo: N, Coordenadas: [-82.717 126.072  85.898]
      Atomo: CA, Coordenadas: [-82.683 125.823  87.332]
      Atomo: C, Coordenadas: [-82.501 124.331  87.641]
      Atomo: O, Coordenadas: [-83.148 123.465  87.062]
      Atomo: CB, Coordenadas: [-83.997 126.333  87.934]
      Atomo: CG, Coordenadas: [-83.952 127.826  88.265]
      Atomo: CD, Coordenadas: [-82.568 128.191  88.754]
      Atomo: OE1, Coordenadas: [-81.553 127.904  88.14 ]
      Atomo: NE2, Coordenadas: [-82.562 128.864  89.92 ]
    Residuo: PRO 6
      Atomo: N, Coordenadas: [-81.637 124.048  88.644]
      Atomo: CA, Coordenadas: [-81.372 122.672  89.057]
      Atomo: C, Coordenadas: [-82.648 121.967  89.536]
      Atomo: O, Coordenadas: [-83.222 122.282  90.572]
      Atomo: CB, Coordenadas: [-80.327 122.716  90.179]
      Atomo: CG, Coordenadas: [-80.022 124.182  90.498]
      Atomo: CD, Coordenadas: 

# Previsão de Propriedades Baseadas na Sequência

## Previsão de Regiões Transmembranares

In [1]:
from Bio.SeqUtils import ProtParam
from Bio import SeqIO

def analyze_protein_sequence(file_path):

    with open(file_path, "r") as handle:
        for record in SeqIO.parse(handle, "fasta"):
            protein_sequence = str(record.seq)

            protein_analysis = ProtParam.ProteinAnalysis(protein_sequence)
            print(f"Peso molecular: {protein_analysis.molecular_weight()} Da")
            print(f"Índice de instabilidade: {protein_analysis.instability_index()}")
            print(f"Ponto isoelétrico: {protein_analysis.isoelectric_point()}")
            print(f"Aminoácidos transmembranares: {protein_analysis.flexibility()}")
protein_seq_file = "WP_003546577_1.faa"

analyze_protein_sequence(protein_seq_file)

Peso molecular: 92036.69709999999 Da
Índice de instabilidade: 29.79777085927774
Ponto isoelétrico: 5.717911720275879
Aminoácidos transmembranares: [1.0250357142857145, 1.0474642857142857, 1.0154761904761904, 1.0837619047619047, 1.0553571428571429, 1.0550000000000002, 1.0238809523809524, 1.0785238095238097, 1.0406904761904763, 1.0489285714285717, 0.9914404761904763, 1.0248214285714288, 0.9740595238095238, 0.9876785714285715, 0.9876785714285715, 1.0294166666666669, 1.0160595238095238, 1.0298928571428572, 1.008, 1.065047619047619, 1.050107142857143, 1.0212380952380953, 1.0516071428571427, 1.0380714285714288, 1.034559523809524, 1.034702380952381, 0.9840000000000001, 0.9969166666666668, 1.0010833333333333, 0.9833452380952381, 0.9724761904761907, 0.9986904761904764, 0.9815238095238095, 0.9697738095238095, 0.9883690476190478, 1.003809523809524, 1.0063690476190477, 0.9919999999999999, 0.9818690476190477, 1.009095238095238, 1.0124523809523809, 0.9972857142857142, 0.9695714285714286, 1.010690476

## Análise de Domínios Conservados com NCBI CDD

In [9]:
from Bio.Blast import NCBIWWW, NCBIXML
from Bio import SeqIO

def search_cdd(sequence):
    result_handle = NCBIWWW.qblast("blastp", "cdd", sequence)
    blast_record = NCBIXML.read(result_handle)

    for alignment in blast_record.alignments:
         for hsp in alignment.hsps:
            print(f"Domínio: {alignment.title}")
            print(f"E-value: {hsp.expect:.3e}")
            print("-" * 50)

def load_protein_sequence(file_path):
    with open(file_path, "r") as handle:
        for record in SeqIO.parse(handle, "fasta"):
            return str(record.seq) 

protein_seq = load_protein_sequence("WP_003546577_1.faa") 
search_cdd(protein_seq)

Domínio: gnl|CDD|340853 cd04300, GT35_Glycogen_Phosphorylase, glycogen phosphorylase and similar proteins.  This is a family of oligosaccharide phosphorylases. It includes yeast and mammalian glycogen phosphorylases, plant starch/glucan phosphorylase, as well as the maltodextrin phosphorylases of bacteria. The members of this family catalyze the breakdown of oligosaccharides into glucose-1-phosphate units. They are important allosteric enzymes in carbohydrate metabolism. The allosteric control mechanisms of yeast and mammalian members of this family are different from that of bacterial members. The members of this family belong to the GT-B structural superfamily of glycoslytransferases, which have characteristic N- and C-terminal domains each containing a typical Rossmann fold. The two domains have high structural homology despite minimal sequence homology.  The large cleft that separates the two domains includes the catalytic center and permits a high degree of flexibility.
E-value: 0