In [1]:
import os
import requests
import pandas as pd
import json
import time
import random

# API Request for Historical Text Documents

In [5]:
# OpenFDA API endpoint for fetching historical documents
BASE_URL = 'https://api.fda.gov/other/historicaldocument.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Adjust the limit as needed
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    documents = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            doc_type = item.get('doc_type', 'N/A')
            year = item.get('year', 'N/A')
            text = item.get('text', 'N/A')
            document = {'doc_type': doc_type, 'year': year, 'text': text}
            if document not in documents:
                documents.append(document)
        
        params['skip'] += params['limit']
        pause_execution()

    return documents

# Fetch the data
documents = fetch_data()

# Save the data to a CSV file
documents_df = pd.DataFrame(documents)
documents_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 404
Failed to fetch data: 404


Unnamed: 0,doc_type,year,text
0,pr,2006,FDA NEWS RELEASE\nFOR IMMEDIATE RELEASE\n\nFeb...
1,pr,2006,FDA NEWS RELEASE\nFOR IMMEDIATE RELEASE\n\nFeb...
2,talk,1991,I FOOD _-AND DRUG ADMINISTRATION ‘ 'i\nU. S. D...
3,pr,1934,iNFORMATION FOR THE PRESS\n\nu 8. DEPARTMENT O...
4,pr,1975,\n\nU. S. DEPARTMENT OF HEALTH. EDUCATION. AN...


In [7]:
# Save csv file to data library
#csv_file_path = 'DataLibrary/raw_documents.csv'
#documents_df.to_csv(csv_file_path, index=False)

# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_documents.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(documents, json_file, indent=4)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

Data saved to DataLibrary/raw_documents.csv and DataLibrary/raw_documents.json


# API Request for Adverse Events Data

In [8]:
# OpenFDA API endpoint for fetching drug event data
BASE_URL = 'https://api.fda.gov/drug/event.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Maximum limit per request
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    events = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            events.append(item)
        
        params['skip'] += params['limit']
        pause_execution()

    return events

# Fetch the data
events = fetch_data()

# Convert the data to a pandas DataFrame
events_df = pd.json_normalize(events)

events_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 10000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 11000}
Respons

Unnamed: 0,safetyreportid,transmissiondateformat,transmissiondate,serious,seriousnessdeath,receivedateformat,receivedate,receiptdateformat,receiptdate,fulfillexpeditecriteria,...,occurcountry,patient.patientagegroup,seriousnesshospitalization,patient.summary.narrativeincludeclinical,seriousnesslifethreatening,patient.patientweight,primarysource.literaturereference,seriousnesscongenitalanomali,authoritynumb,reportduplicate
0,5801206-7,102,20090109,1,1.0,102,20080707,102,20080625,1,...,,,,,,,,,,
1,10003300,102,20141002,1,,102,20140306,102,20140306,2,...,,,,,,,,,,
2,10003301,102,20141002,1,,102,20140228,102,20140228,2,...,,,,,,,,,,
3,10003302,102,20141002,2,,102,20140312,102,20140312,2,...,US,,,,,,,,,
4,10003304,102,20141212,2,,102,20140312,102,20140424,2,...,US,,,,,,,,,


In [9]:
# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_events.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(events, json_file, indent=4)

# Save the data to a CSV file
#csv_file_path = 'DataLibrary/raw_events.csv'
#events_df.to_csv(csv_file_path, index=False)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

Data saved to DataLibrary/raw_events.csv and DataLibrary/raw_events.json


# API Request for Drug Labels

In [10]:
# OpenFDA API endpoint for fetching drug label data
BASE_URL = 'https://api.fda.gov/drug/label.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Maximum limit per request
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    labels = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            documents.append(item)
        
        params['skip'] += params['limit']
        pause_execution()

    return labels

# Fetch the data
labels = fetch_data()

# Convert the data to a pandas DataFrame
labels_df = pd.json_normalize(labels)

labels_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 10000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 11000}
Respons

In [11]:
# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_labels.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(labels, json_file, indent=4)

# Save the data to a CSV file
#csv_file_path = 'DataLibrary/raw_labels.csv'
#labels_df.to_csv(csv_file_path, index=False)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

Data saved to DataLibrary/raw_labels.csv and DataLibrary/raw_labels.json


# API Request for RxNorm