<a href="https://colab.research.google.com/github/lalgudisethu/corona/blob/master/sample_results.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import requests
import pandas as pd
import json
import re

# Define gene of interest
gene_symbol = "TP53"

# 1. Fetch Type and Ensembl Details (using Ensembl REST API)
ensembl_url = f"http://rest.ensembl.org/lookup/symbol/homo_sapiens/{gene_symbol}?expand=1"
headers = {"Content-Type": "application/json"}
response = requests.get(ensembl_url, headers=headers)
if response.status_code == 200:
    ensembl_data = response.json()
    print("\n1. Type and Ensembl Details:")
    print(f"Gene Symbol: {ensembl_data['display_name']}")
    print(f"Type: {ensembl_data['biotype']}")
    print(f"Ensembl Gene ID: {ensembl_data['id']}")
    print(f"Chromosome: {ensembl_data['seq_region_name']}")
    print(f"Start: {ensembl_data['start']}")
    print(f"End: {ensembl_data['end']}")
else:
    print(f"Error fetching Ensembl data: {response.status_code}")

# 2. Fetch Associated Genes (from STRING API)
string_api_url = "https://string-db.org/api/json/network"
params = {
    "identifiers": gene_symbol,
    "species": 9606,  # Human
    "limit": 10
}
response = requests.get(string_api_url, params=params)
if response.status_code == 200:
    string_data = response.json()
    associated_genes = set()
    for interaction in string_data:
        gene_a = interaction['preferredName_A']
        gene_b = interaction['preferredName_B']
        if gene_a != gene_symbol:
            associated_genes.add(gene_a)
        if gene_b != gene_symbol:
            associated_genes.add(gene_b)
    print("\n2. Associated Genes (from STRING):")
    print(list(associated_genes))
else:
    print(f"Error fetching STRING data: {response.status_code}")

# 3. Fetch Associated Proteins (from UniProt)
uniprot_url = f"https://rest.uniprot.org/uniprotkb/search?query=gene:{gene_symbol}+organism_id:9606"
response = requests.get(uniprot_url)
if response.status_code == 200:
    uniprot_data = response.json()
    uniprot_id = uniprot_data['results'][0]['primaryAccession']
    print("\n3. Associated Proteins (UniProt ID for TP53):")
    print(uniprot_id)

    # Fetch protein interactions (reuse STRING for simplicity)
    print("Protein interactions (reusing STRING genes as proxies):")
    print(list(associated_genes))
else:
    print(f"Error fetching UniProt data: {response.status_code}")

# 4. Fetch Pathway Involvement (from KEGG API)
# Note: KEGG requires Ensembl ID or NCBI ID; we'll use NCBI ID for simplicity
ncbi_url = f"http://rest.ensembl.org/xrefs/symbol/homo_sapiens/{gene_symbol}?external_db=EntrezGene"
response = requests.get(ncbi_url, headers=headers)
if response.status_code == 200:
    ncbi_data = response.json()
    ncbi_id = ncbi_data[0]['id'] if ncbi_data else None
    kegg_url = f"http://rest.kegg.jp/get/hsa:{ncbi_id}"
    response = requests.get(kegg_url)
    if response.status_code == 200:
        kegg_data = response.text
        pathways = re.findall(r"PATHWAY\s+(.+)", kegg_data)
        print("\n4. Pathway Involvement (from KEGG):")
        if pathways:
            for pathway in pathways:
                print(pathway)
        else:
            print("No KEGG pathways found")
    else:
        print(f"Error fetching KEGG data: {response.status_code}")
else:
    print(f"Error fetching NCBI ID: {response.status_code}")

# 5. Pathways Affected (Prostate Cancer, KEGG hsa05215)
kegg_prostate_url = "http://rest.kegg.jp/get/hsa05215"
response = requests.get(kegg_prostate_url)
if response.status_code == 200:
    kegg_prostate_data = response.text
    print("\n5. Pathways Affected (Prostate Cancer, KEGG hsa05215):")
    if gene_symbol in kegg_prostate_data:
        print("TP53 is involved in prostate cancer pathway")
        pathways = re.findall(r"PATHWAY\s+(.+)", kegg_prostate_data)
        for pathway in pathways:
            print(pathway)
    else:
        print("TP53 not found in prostate cancer pathway")
else:
    print(f"Error fetching KEGG prostate cancer data: {response.status_code}")

# 6. Fetch IDs
print("\n6. IDs:")
print(f"Ensembl Gene ID: {ensembl_data['id']}")
print(f"UniProt ID: {uniprot_id}")
print(f"NCBI Gene ID: {ncbi_id}")


1. Type and Ensembl Details:
Gene Symbol: TP53
Type: protein_coding
Ensembl Gene ID: ENSG00000141510
Chromosome: 17
Start: 7661779
End: 7687546

2. Associated Genes (from STRING):
['DAXX', 'CREBBP', 'MDM2', 'HSP90AA1', 'SIRT1', 'SFN', 'ATM', 'RPA1', 'EP300', 'TP53BP2']

3. Associated Proteins (UniProt ID for TP53):
P04637
Protein interactions (reusing STRING genes as proxies):
['DAXX', 'CREBBP', 'MDM2', 'HSP90AA1', 'SIRT1', 'SFN', 'ATM', 'RPA1', 'EP300', 'TP53BP2']
Error fetching KEGG data: 404

5. Pathways Affected (Prostate Cancer, KEGG hsa05215):
TP53 is involved in prostate cancer pathway
hsa00140  Steroid hormone biosynthesis
ko05215

6. IDs:
Ensembl Gene ID: ENSG00000141510
UniProt ID: P04637
NCBI Gene ID: ENSG00000141510
