In [2]:
import os
import requests
import pandas as pd
import json
import time
import random

import pymysql as mysql
import mysql.connector
import warnings
import matplotlib.pyplot as plt
import csv
import pyodbc
import sqlite3
import sqlalchemy
from sqlalchemy import create_engine
from sqlalchemy import event
from tabulate import tabulate
import plotly.express as px
import seaborn as sns


# mysql password
PASSWORD = "password"

# FDA Data

### API Request for Historical Text Documents

In [3]:
# OpenFDA API endpoint for fetching historical documents
BASE_URL = 'https://api.fda.gov/other/historicaldocument.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Adjust the limit as needed
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    documents = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            doc_type = item.get('doc_type', 'N/A')
            year = item.get('year', 'N/A')
            text = item.get('text', 'N/A')
            document = {'doc_type': doc_type, 'year': year, 'text': text}
            if document not in documents:
                documents.append(document)
        
        params['skip'] += params['limit']
        pause_execution()

    return documents

# Fetch the data
documents = fetch_data()

# Save the data to a CSV file
documents_df = pd.DataFrame(documents)
documents_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 404
Failed to fetch data: 404


Unnamed: 0,doc_type,year,text
0,pr,2006,FDA NEWS RELEASE\nFOR IMMEDIATE RELEASE\n\nFeb...
1,pr,2006,FDA NEWS RELEASE\nFOR IMMEDIATE RELEASE\n\nFeb...
2,talk,1991,I FOOD _-AND DRUG ADMINISTRATION ‘ 'i\nU. S. D...
3,pr,1934,iNFORMATION FOR THE PRESS\n\nu 8. DEPARTMENT O...
4,pr,1975,\n\nU. S. DEPARTMENT OF HEALTH. EDUCATION. AN...


In [4]:
# Save csv file to data library
#csv_file_path = 'DataLibrary/raw_documents.csv'
#documents_df.to_csv(csv_file_path, index=False)

# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_documents.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(documents, json_file, indent=4)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

### API Request for Adverse Events Data

In [5]:
# OpenFDA API endpoint for fetching drug event data
BASE_URL = 'https://api.fda.gov/drug/event.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Maximum limit per request
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    events = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            events.append(item)
        
        params['skip'] += params['limit']
        pause_execution()

    return events

# Fetch the data
events = fetch_data()

# Convert the data to a pandas DataFrame
events_df = pd.json_normalize(events)

events_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 10000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 11000}
Respons

Unnamed: 0,safetyreportid,transmissiondateformat,transmissiondate,serious,seriousnessdeath,receivedateformat,receivedate,receiptdateformat,receiptdate,fulfillexpeditecriteria,...,occurcountry,patient.patientagegroup,seriousnesshospitalization,patient.summary.narrativeincludeclinical,seriousnesslifethreatening,patient.patientweight,primarysource.literaturereference,seriousnesscongenitalanomali,authoritynumb,reportduplicate
0,5801206-7,102,20090109,1,1.0,102,20080707,102,20080625,1,...,,,,,,,,,,
1,10003300,102,20141002,1,,102,20140306,102,20140306,2,...,,,,,,,,,,
2,10003301,102,20141002,1,,102,20140228,102,20140228,2,...,,,,,,,,,,
3,10003302,102,20141002,2,,102,20140312,102,20140312,2,...,US,,,,,,,,,
4,10003304,102,20141212,2,,102,20140312,102,20140424,2,...,US,,,,,,,,,


In [9]:
# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_events.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(events, json_file, indent=4)

# Save the data to a CSV file
#csv_file_path = 'DataLibrary/raw_events.csv'
#events_df.to_csv(csv_file_path, index=False)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

Data saved to DataLibrary/raw_events.csv and DataLibrary/raw_events.json


### API Request for Drug Labels

In [7]:
# OpenFDA API endpoint for fetching drug label data
BASE_URL = 'https://api.fda.gov/drug/label.json'

# Parameters for the API request
params = {
    'limit': 1000,  # Maximum limit per request
    'skip': 0      # Starting point for fetching records
}

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(5 + 10 * random.random())

# Function to fetch data from the OpenFDA API
def fetch_data():
    labels = []
    while True:
        response = requests.get(BASE_URL, params=params)
        print(f"Requesting data with params: {params}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break
        
        data = response.json()
        results = data.get('results', [])
        if not results:
            print("No more results found.")  # Debugging statement
            break

        for item in results:
            documents.append(item)
        
        params['skip'] += params['limit']
        pause_execution()

    return labels

# Fetch the data
labels = fetch_data()

# Convert the data to a pandas DataFrame
labels_df = pd.json_normalize(labels)

labels_df.head()

Requesting data with params: {'limit': 1000, 'skip': 0}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 1000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 2000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 3000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 4000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 5000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 6000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 7000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 8000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 9000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 10000}
Response status code: 200
Requesting data with params: {'limit': 1000, 'skip': 11000}
Respons

In [8]:
# Save the data to a JSON file
#json_file_path = 'DataLibrary/raw_labels.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(labels, json_file, indent=4)

# Save the data to a CSV file
#csv_file_path = 'DataLibrary/raw_labels.csv'
#labels_df.to_csv(csv_file_path, index=False)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

# National Library of Medicine - Standardized Drug Names and Information

### API Request for RxNorm

In [None]:
# Base URL for RxTerms API to get all drug names
BASE_URL = 'https://rxnav.nlm.nih.gov/REST/RxTerms/rxcui/properties'

# Function to pause execution to avoid overwhelming the server
def pause_execution():
    time.sleep(1 + 2 * random.random())

# Function to fetch data from the RxTerms API
def fetch_data():
    drugs = []
    start_rxcui = 1  # Starting RxCUI for the search
    step = 1000      # Step size to iterate through RxCUIs

    while True:
        end_rxcui = start_rxcui + step
        url = f"{BASE_URL}?rxcui={start_rxcui}"
        response = requests.get(url)
        print(f"Requesting data with RxCUI: {start_rxcui}")  # Debugging statement
        print(f"Response status code: {response.status_code}")  # Debugging statement

        if response.status_code != 200:
            print(f"Failed to fetch data: {response.status_code}")  # Debugging statement
            break

        data = response.json()
        if 'rxtermDrugConcept' not in data:
            print("No more results found.")  # Debugging statement
            break

        results = data['rxtermDrugConcept']
        for item in results:
            drugs.append(item)

        start_rxcui += step
        pause_execution()

    return drugs

# Fetch the data
drugs = fetch_data()

# Convert the data to a pandas DataFrame
drugs_df = pd.json_normalize(drugs)

In [None]:
# Save the data to a JSON file
#json_file_path = 'DataLibrary/rxterms_drugs.json'
#with open(json_file_path, 'w') as json_file:
#    json.dump(drugs, json_file, indent=4)

# Save the data to a CSV file
#csv_file_path = 'Data Library/rxterms_drugs.csv'
#drugs_df.to_csv(csv_file_path, index=False)

#print(f"Data saved to {csv_file_path} and {json_file_path}")

# Medicaid Drug Prices API 

In [10]:
# Request historical and current for dashboard

# Select Variables of Interest from Datasets

# Standardize Variable Names Across Data Sources

In [None]:
# package_ndc = NDC

# Combining Data into SQL Database Based on Keys

In [6]:
# Keys in FDA data = rxcui OR package_ndc
# Key in medicaid data = NDC
# Key in RxNorm data = rxcui

### Create new database to store data

In [None]:
# connect to MySQL server
connection = mysql.connector.connect(host="localhost", user="root", password=PASSWORD)

# Create a cursor object
cursor = connection.cursor()

# Drop the database if it exists - this is so we can start fresh (at least while developing)
cursor.execute("DROP DATABASE IF EXISTS pharma_db")

# Create the 'covid_db' database
cursor.execute("CREATE DATABASE pharma_db")

# Switch to the 'covid_db' database
cursor.execute("USE pharma_db")

# Commit the changes
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()

### Create dataframes from API request dataframes

In [None]:
# Connect to the MySQL server
connection = mysql.connector.connect(
    host="localhost", user="root", password=PASSWORD, database="pharma_db"
)

# Create a cursor object
cursor = connection.cursor()

# Create the table
create_table1_query = """
CREATE TABLE IF NOT EXISTS `table_name` (
  vars
) ENGINE=InnoDB;
"""
cursor.execute(create_table1_query)


# Create the table
create_table2_query = """
CREATE TABLE IF NOT EXISTS `table_name` (
  vars
) ENGINE=InnoDB;
"""
cursor.execute(create_table2_query)

# Create the table
create_table3_query = """
CREATE TABLE IF NOT EXISTS `table_name` (
  vars
) ENGINE=InnoDB;
"""
cursor.execute(create_table3_query)

# Create the table
create_table4_query = """
CREATE TABLE IF NOT EXISTS `table_name` (
  vars
) ENGINE=InnoDB;
"""
cursor.execute(create_table4_query)


# show tables below
cursor.execute("SHOW TABLES")

# Fetch all the rows
tables = cursor.fetchall()

# Print the list of tables
for table in tables:
    print(table[0])

# Commit the changes
connection.commit()

# Close the cursor and connection
cursor.close()
connection.close()