In [1]:
from dotenv import load_dotenv
from rich import print
import pandas as pd
import httpx
import os

load_dotenv()

# Show all columns
pd.set_option('display.max_columns', None)

In [2]:
endpoint_effective_time = f"https://api.fda.gov/drug/label.json?api_key={os.getenv('FDA_API_KEY')}&search=effective_time:[20110601+TO+20231231]&limit=90"
endpoint_finished = f"https://api.fda.gov/drug/ndc.json?api_key={os.getenv('FDA_API_KEY')}&search=finished:true&limit=300"

In [3]:
# Use the httpx library to make a GET request to the endpoint
response_1 = httpx.get(endpoint_effective_time)
response_2 = httpx.get(endpoint_finished)

# Print the status code of the response
print(f"Status code: {response_1.status_code}")
print(f"Status code: {response_2.status_code}")

# Save the response body to a variable
data_eff_time = response_1.json()
data_finished = response_2.json()

# Print the number of results
print(f"Number of results: {data_eff_time['meta']['results']['total']}")
print(f"Number of results: {data_finished['meta']['results']['total']}")

In [None]:
# Print the first result
print(data_eff_time['results'][0])

In [4]:
print(data_finished['results'][0])

In [None]:
for result in data_eff_time['results'][:10]:
    print(result['effective_time'])

In [None]:
for result in data_finished['results'][:10]:
    print(result['generic_name'])

In [None]:
# Converting the first result to a dataframe
df_1 = pd.DataFrame(data_eff_time['results'])
df_2 = pd.DataFrame(data_finished['results'])

In [None]:
# Drop the references column from df_1
df_1.drop(columns='references', inplace=True)

In [None]:
df_1.info()

In [None]:
df_2.head()

In [None]:
df_2.info()

In [None]:
# Save the dataframes to CSV files
df_1.to_csv('../data/raw/drug_effective_time.csv', index=False)

In [None]:
# Convert finished column from boolean to string
df_2['finished'] = df_2['finished'].astype(str)