# 02 — Exploratory Visualizations (Static) — Integrated

In [None]:
# Ensure deps in active kernel
import sys, subprocess, importlib
def ensure(pkg):
    try:
        importlib.import_module(pkg)
    except Exception:
        subprocess.check_call([sys.executable, "-m", "pip", "install", pkg])
ensure("pandas"); ensure("matplotlib")

In [None]:
import pandas as pd, matplotlib.pyplot as plt
from pathlib import Path

# Load processed dataset
ROOT = Path.cwd()
candidates = [ROOT.parent / "data/processed/fpds_cleaned.csv", ROOT / "data/processed/fpds_cleaned.csv"]
csv_path = next((p for p in candidates if p.exists()), None)
if csv_path is None:
    raise FileNotFoundError("Run 01_cleaning_fpds.ipynb first to create the processed file.")
df = pd.read_csv(csv_path, parse_dates=["action_date"])
print("[INFO] Loaded", csv_path)

### Chart 1 — Monthly Contract Volume

In [None]:
cnt = df["year_month"].value_counts().sort_index()
plt.figure(figsize=(9,4))
cnt.plot(kind="line")
plt.title("Monthly DoD Contract Volume")
plt.xlabel("Year-Month")
plt.ylabel("Count")
plt.tight_layout()
plt.show()

### Chart 2 — Top Vendors by Count

In [None]:
top = df["vendor_group"].value_counts().head(10).sort_values()
plt.figure(figsize=(7,4))
top.plot(kind="barh")
plt.title("Top 10 Vendors by Contract Count")
plt.xlabel("Contracts")
plt.ylabel("Vendor")
plt.tight_layout()
plt.show()

### Chart 3 — Average Turnaround Time (BTS) by Month (Mean over States)

In [None]:
if {"average_turnaround_time","year_month"}.issubset(df.columns):
    agg = df.groupby("year_month")["average_turnaround_time"].mean().dropna()
    if not agg.empty:
        plt.figure(figsize=(9,4))
        agg.plot(kind="line")
        plt.title("Average Turnaround Time by Month (BTS)")
        plt.xlabel("Year-Month")
        plt.ylabel("Hours")
        plt.tight_layout()
        plt.show()
    else:
        print("[INFO] No BTS turnaround values present.")
else:
    print("[INFO] BTS columns not found; run 01_cleaning_fpds.ipynb.")

### Chart 4 — Contracts by Sanction Flag

In [None]:
if "sanctioned_vendor" in df.columns:
    vc = df["sanctioned_vendor"].value_counts().sort_index()
    plt.figure(figsize=(6,4))
    vc.plot(kind="bar")
    plt.title("Contracts by Sanctioned Vendor Flag")
    plt.xlabel("Sanctioned Vendor (False/True)")
    plt.ylabel("Count")
    plt.tight_layout()
    plt.show()
else:
    print("[INFO] Sanction columns not found; run 01_cleaning_fpds.ipynb.")