In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px
from api_utils import fetch_pharmacy_data  # Import helper

# Dynamic load
df = fetch_pharmacy_data(limit=300)  # Pull fresh data
if df.empty:
    print("Fallback to sample data if API fails.")
    # Optional: Load static CSV as backup

# Rest of EDA (adapt columns to API fields, e.g., 'action_date' for trends)
df['action_date'] = pd.to_datetime(df['action_date'], errors='coerce')  # Ensure datetime
daily_trends = df.groupby(df['action_date'].dt.date)['total_amount'].sum().reset_index()
fig1 = px.line(daily_trends, x='action_date', y='total_amount', title='Dynamic Recall Trends (API-Fetched)')
fig1.show()

# Top products (recalls by product)
top_products = df.groupby('product_name')['quantity_involved'].sum().sort_values(ascending=False).head(10)
plt.figure(figsize=(10, 6))
sns.barplot(x=top_products.values, y=top_products.index, palette='viridis')
plt.title('Top Products by Quantity Involved (Dynamic Data)')
plt.show()

# Correlation heatmap (adapt to numerical cols like quantity, mock total_amount)
numerical_df = df.select_dtypes(include=[np.number])
corr = numerical_df.corr()
plt.figure(figsize=(8, 6))
sns.heatmap(corr, annot=True, cmap='coolwarm')
plt.title('Dynamic Data Correlations')
plt.show()

print(f"Average quantity involved: {df['quantity_involved'].mean():.2f}")
print(f"Most common reason: {df['reason'].mode()[0] if not df['reason'].mode().empty else 'N/A'}")

API error: 500 Server Error: Internal Server Error for url: https://api.fda.gov/drug/enforcement.json?search=report_date%3A%5B2020-10-07%2BTO%2B2025-10-06%5D&limit=300&skip=0
Fallback to sample data if API fails.


KeyError: 'action_date'