# Fetch Data From [Guide to Pharmacology](https://www.guidetopharmacology.org)

In [1]:
import pandas as pd
import re
from drug_nme.fetch import PharmacologyDataFetcher

In [2]:
extract = PharmacologyDataFetcher()

df = extract.get_data()

Downloading Data From Guide To Pharmacology: 912KB [00:09, 95.94KB/s] 


In [3]:
df

Unnamed: 0,ligandId,name,abbreviation,inn,type,species,radioactive,labelled,approved,withdrawn,whoEssential,immuno,malaria,antibacterial,approvalSource,subunitIds,complexIds,prodrugIds,activeDrugIds
0,2779,"1,25-dihydroxyvitamin D3",,calcitriol,Metabolite,,False,False,True,False,False,False,False,False,FDA (1978),[],[],[],[]
1,1013,17&beta;-estradiol,E2,estradiol,Metabolite,,False,False,True,False,False,False,False,False,"FDA (1954, prior history unavailable)",[],[],[7655],[]
2,4108,5&alpha;-pregnan-3&alpha;-ol-20-one,,brexanolone,Metabolite,,False,False,True,False,False,False,False,False,FDA (2019),[],[],[],[]
3,4784,5-aminolevulinic acid,,,Metabolite,,False,False,True,False,False,False,False,False,FDA (1999)m EMA (2007),[],[],[],[]
4,4789,5-fluorouracil,,fluorouracil,Synthetic organic,,False,False,True,False,True,False,False,False,FDA (1962),[],[],"[6799, 4801]",[]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1990,7430,zopiclone,,zopiclone,Synthetic organic,,False,False,True,False,False,False,False,False,,[],[],[],[]
1991,103,zotepine,,zotepine,Synthetic organic,,False,False,True,False,False,False,False,False,,[],[],[],[]
1992,11576,zucapsaicin,,zucapsaicin,Synthetic organic,,False,False,True,False,False,False,False,False,Canada (2010),[],[],[],[]
1993,7559,zuclopenthixol,,zuclopenthixol,Synthetic organic,,False,False,True,False,False,False,False,False,,[],[],[],[]


In [4]:
working = df['approvalSource'].str.split(r'\(|\)', expand=True)

In [None]:
def process_string(text):
    # Split by parentheses
    parts = re.split(r'\(|\)', text)
    # Clean and strip parts
    parts = [part.strip(', ') for part in parts if part]

    result = {'FDA': None, 'FDA_year': None, 'UK': None, 'UK_year': None, 'EMA': None, 'EMA_year': None}

    # Process each part
    for i in range(0, len(parts), 2):
        if i + 1 < len(parts):  # Ensure there's a year part
            entities = parts[i]
            year = parts[i + 1]

            if 'FDA' in entities:
                result['FDA'] = entities
                result['FDA_year'] = year
            
            # comment this part out? 
            if 'EMA' in entities:
                result['EMA'] = entities
                result['EMA_year'] = year
            if 'UK' in entities:
                result['UK'] = entities
                result['UK_year'] = year
            if 'Japan' in entities:
                result['Japan'] = entities
                result['Japan_year'] = year
            if 'China' in entities:
                result['China'] = entities
                result['China_year'] = year

    return pd.Series(result)

In [None]:
# Apply the function to the dataframe
working = df['approvalSource'].apply(process_string)

working

In [None]:
new = pd.concat([df, working], axis=1)
new

In [None]:
from drug_nme.fetch import FDADataFetcher

extract = FDADataFetcher()

In [None]:
df = extract.get_data()
df

In [None]:
import requests
from tqdm import tqdm
import zipfile
import io
import json

# URL of the zip file
url = "https://download.open.fda.gov/drug/drugsfda/drug-drugsfda-0001-of-0001.json.zip"

# Stream the download and show progress bar
response = requests.get(url, stream=True)
response.raise_for_status()  # Check if the request was successful

total_size = int(response.headers.get('content-length', 0))
block_size = 1024  # 1 Kibibyte
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)

file_bytes = io.BytesIO()
for data in response.iter_content(block_size):
    progress_bar.update(len(data))
    file_bytes.write(data)
progress_bar.close()

# Move to the start of the BytesIO buffer
file_bytes.seek(0)

# Create a ZipFile object from the downloaded content
with zipfile.ZipFile(file_bytes) as z:
    # Extract the JSON file
    json_filename = z.namelist()[0]  # Assuming there's only one file in the zip
    with z.open(json_filename) as json_file:
        data = json.load(json_file)

# Now 'data' contains the contents of the JSON file

In [None]:
import requests
from tqdm import tqdm
import zipfile
import io
import json
import pandas as pd

# URL of the zip file
url = "https://download.open.fda.gov/drug/drugsfda/drug-drugsfda-0001-of-0001.json.zip"

# Stream the download and show progress bar
response = requests.get(url, stream=True)
response.raise_for_status()  # Check if the request was successful

total_size = int(response.headers.get('content-length', 0))
block_size = 1024  # 1 Kibibyte
progress_bar = tqdm(total=total_size, unit='iB', unit_scale=True)

file_bytes = io.BytesIO()
for data in response.iter_content(block_size):
    progress_bar.update(len(data))
    file_bytes.write(data)
progress_bar.close()

# Move to the start of the BytesIO buffer
file_bytes.seek(0)

# Create a ZipFile object from the downloaded content
with zipfile.ZipFile(file_bytes) as z:
    # Extract the JSON file
    json_filename = z.namelist()[0]  # Assuming there's only one file in the zip
    with z.open(json_filename) as json_file:
        data = json.load(json_file)

In [None]:
data

In [None]:
# df = pd.json_normalize(data)
df = pd.DataFrame(data['results'])
df