In [None]:
import pandas as pd
import requests
from io import StringIO
import matplotlib.pyplot as plt
import seaborn as sns
import ipywidgets as widgets
from IPython.display import display, clear_output

%matplotlib inline


In [None]:
USDA_URL = "https://fems.fs2c.usda.gov/fuelmodel/sample/download?returnAll=&responseFormat=csv&siteId=All&sampleId=&startDate=2005-01-01T00:00:00.000Z&endDate=2025-03-25T23:00:00.000Z&filterByFuelId=&filterByStatus=Submitted&filterByCategory=All&filterBySubCategory=All&filterByMethod=All&sortBy=fuel_type&sortOrder=asc"

print("Fetching data from USDA...")
response = requests.get(USDA_URL)
response.raise_for_status()
df = pd.read_csv(StringIO(response.text))

df.columns = ["Sample Id", "Date-Time", "Site Name", "SiteId", "Fuel Type", "Category", "Sub-Category", "Method", "Sample Avg Value", "Sample Status"]

df["Date-Time"] = pd.to_datetime(df["Date-Time"], errors="coerce")
df = df[df["Date-Time"].notnull()]
df["Date-Time"] = df["Date-Time"].apply(lambda x: x.replace(tzinfo=None) if x.tzinfo else x)

desired_categories = ["Grass", "Forbs", "Shrubs", "Trees", "Dead"]
existing_categories = df["Category"].dropna().unique().tolist()
missing_categories = [cat for cat in desired_categories if cat not in existing_categories]

default_date = pd.Timestamp("2005-01-01")
placeholder_rows = []

for cat in missing_categories:
    placeholder_rows.append({
        "Sample Id": None,
        "Date-Time": default_date,
        "Site Name": "Placeholder Site",
        "SiteId": None,
        "Fuel Type": "Placeholder",
        "Category": cat,
        "Sub-Category": None,
        "Method": None,
        "Sample Avg Value": None,
        "Sample Status": None
    })

if placeholder_rows:
    placeholder_df = pd.DataFrame(placeholder_rows)
    df = pd.concat([df, placeholder_df], ignore_index=True)

print(f"Data loaded. Rows: {len(df)}")
