# Análise da proteína do gene ARHGAP24


In [1]:
from Bio import SeqIO
from Bio.Blast import NCBIWWW, NCBIXML
import requests
from io import StringIO     
from Bio import ExPASy
import Bio.SwissProt as sp

Aceder e salvar ficheiro da proteína no Uniprot

In [2]:
def fetch_protein_sequence(uniprot_id):
    url = f"https://www.uniprot.org/uniprot/{uniprot_id}.txt"
    response = requests.get(url)

    # Check if the request was successful (status code 200)
    if response.status_code == 200:
        # Extract the protein sequence from the response content
        protein_sequence = response.text

        # Specify the file name based on the UniProt ID
        file_name = f"{uniprot_id}_sequence.txt"

        # Open the file in write mode and save the protein sequence
        with open(file_name, "w") as file:
            file.write(protein_sequence)

        print(f"Protein sequence saved to {file_name}")
    else:
        print(f"Error: Unable to fetch protein sequence for UniProt ID {uniprot_id}")


In [3]:
uniprot_id = "Q8N264"
fetch_protein_sequence(uniprot_id)

Protein sequence saved to Q8N264_sequence.txt


Abrir e ler o ficheiro

In [4]:
f = open(f"{uniprot_id}_sequence.txt")
info = sp.read(f)
print(f">{info.entry_name} | {info.organism}")
print(info.sequence)

>RHG24_HUMAN | Homo sapiens (Human).
MEENNDSTENPQQGQGRQNAIKCGWLRKQGGFVKTWHTRWFVLKGDQLYYFKDEDETKPLGTIFLPGNKVSEHPCNEENPGKFLFEVVPGGDRDRMTANHESYLLMASTQNDMEDWVKSIRRVIWGPFGGGIFGQKLEDTVRYEKRYGNRLAPMLVEQCVDFIRQRGLKEEGLFRLPGQANLVKELQDAFDCGEKPSFDSNTDVHTVASLLKLYLRELPEPVIPYAKYEDFLSCAKLLSKEEEAGVKELAKQVKSLPVVNYNLLKYICRFLDEVQSYSGVNKMSVQNLATVFGPNILRPKVEDPLTIMEGTVVVQQLMSVMISKHDCLFPKDAELQSKPQDGVSNNNEIQKKATMGQLQNKENNNTKDSPSRQCSWDKSESPQRSSMNNGSPTALSGSKTNSPKNSVHKLDVSRSPPLMVKKNPAFNKGSGIVTNGSFSSSNAEGLEKTQTTPNGSLQARRSSSLKVSGTKMGTHSVQNGTVRMGILNSDTLGNPTNVRNMSWLPNGYVTLRDNKQKEQAGELGQHNRLSTYDNVHQQFSMMNLDDKQSIDSATWSTSSCEISLPENSNSCRSSTTTCPEQDFFGGNFEDPVLDGPPQDDLSHPRDYESKSDHRSVGGRSSRATSSSDNSETFVGNSSSNHSALHSLVSSLKQEMTKQKIEYESRIKSLEQRNLTLETEMMSLHDELDQERKKFTMIEIKMRNAERAKEDAEKRNDMLQKEMEQFFSTFGELTVEPRRTERGNTIWIQ


Análise da proteina

In [5]:
taxonomia = ''.join(f"{m} | " for m in info.organism_classification)
print(f"\nTAXONOMY: {taxonomia[:-2]}")
print(f"SEQUENCE LENGHT: {info.sequence_length}")

#print(info.comments)
for member in info.comments:
    x = member.find("SIMILARITY")
    y = member.find("FUNCTION")
    z = member.find("SUBCELLULAR LOCATION")
    k = member.find ("DOMAIN")
    n = member.find ("PTM")
    o = member.find ("TISSUE SPECIFICITY")
    i = member.find ("SUBUNIT")
   #p = member.find ("DEVELOPMENTAL STAGE")
        
    if y != -1: 
        function = member.split("{")
        print(function[0])
    if x != -1: 
        similarity = member.split("{")
        print(similarity[0])
    if z != -1:
        sub_location = member.split("{")
        print(sub_location[0])
    if k != -1:
        domain = member.split("{")
        print(domain[0])
    if n != -1:
        ptm= member.split("{")
        print(ptm[0])
    if o != -1:
        ex= member.split("{")
        print(ex[0])
    if i != -1:
        inter= member.split("{")
        print(inter[0])
    #if p != -1:
        #ds= member.split("{")
       # print(ds[0])
  

keywords = ''.join(f"{m} | " for m in info.keywords)
print(f"KEYWORDS: {keywords}")
#print(info.features)


TAXONOMY: Eukaryota | Metazoa | Chordata | Craniata | Vertebrata | Euteleostomi | Mammalia | Eutheria | Euarchontoglires | Primates | Haplorrhini | Catarrhini | Hominidae | Homo 
SEQUENCE LENGHT: 748
FUNCTION: Rho GTPase-activating protein involved in cell polarity, cell morphology and cytoskeletal organization. Acts as a GTPase activator for the Rac-type GTPase by converting it to an inactive GDP-bound state. Controls actin remodeling by inactivating Rac downstream of Rho leading to suppress leading edge protrusion and promotes cell retraction to achieve cellular polarity. Able to suppress RAC1 and CDC42 activity in vitro. Overexpression induces cell rounding with partial or complete disruption of actin stress fibers and formation of membrane ruffles, lamellipodia, and filopodia. Isoform 2 is a vascular cell-specific GAP involved in modulation of angiogenesis. 
SUBUNIT: Interacts with FLNA. 
SUBCELLULAR LOCATION: Cytoplasm, cytoskeleton. Cell junction, adherens junction. Cell junctio