In [1]:
!pip install prophet



In [2]:
pip install tqdm


Note: you may need to restart the kernel to use updated packages.


In [4]:
import pandas as pd
from prophet import Prophet
import os
from tqdm import tqdm

# Load extended dataset
df = pd.read_csv("../data/processed/crop_production_extended.csv")

# Ensure date format for Prophet
df["Crop_Year"] = pd.to_datetime(df["Crop_Year"], format="%Y")
df = df.rename(columns={"Crop_Year": "ds", "Production": "y"})

# Store forecasts
all_forecasts = []

# Get all crop-state combinations
combinations = df.groupby(["Crop", "State"])

print(f"📊 Generating forecasts for {len(combinations)} crop-state combinations...\n")

for (crop, state), group in tqdm(combinations):
    # Check for enough data points (Prophet needs at least 2-3 years)
    if len(group) < 5:
        continue

    group = group[["ds", "y"]].sort_values("ds")

    # Initialize and train model
    model = Prophet(yearly_seasonality=True)
    try:
        model.fit(group)
    except Exception as e:
        print(f"❌ Could not train model for {crop} in {state}: {e}")
        continue

    # Forecast 3 years into the future (2026, 2027, 2028)
    future = model.make_future_dataframe(periods=3, freq='Y')
    forecast = model.predict(future)

    # Annotate with crop/state
    forecast["Crop"] = crop
    forecast["State"] = state

    # Keep necessary columns
    result = forecast[["ds", "Crop", "State", "yhat", "yhat_lower", "yhat_upper"]]
    all_forecasts.append(result)

# Combine all forecasts
forecast_df = pd.concat(all_forecasts, ignore_index=True)

# Save to CSV
output_path = "../data/processed/all_crop_forecasts.csv"
os.makedirs(os.path.dirname(output_path), exist_ok=True)
forecast_df.to_csv(output_path, index=False)

print(f"\n✅ Forecasts saved to: {output_path}")


📊 Generating forecasts for 1112 crop-state combinations...



  0%|          | 0/1112 [00:00<?, ?it/s]17:52:35 - cmdstanpy - INFO - Chain [1] start processing
17:52:35 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  0%|          | 1/1112 [00:00<10:46,  1.72it/s]17:52:35 - cmdstanpy - INFO - Chain [1] start processing
17:52:36 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  0%|          | 2/1112 [00:00<06:22,  2.90it/s]17:52:36 - cmdstanpy - INFO - Chain [1] start processing
17:52:36 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  0%|          | 3/1112 [00:00<05:14,  3.53it/s]17:52:36 - cmdstanpy - INFO - Chain [1] start processing
17:52:36 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  0%|          | 4/1112 [00:01<04:25,  4.17it/s]17:52:36 - cmdstanpy - INFO - Chain [1] start processing
17:52:36 - cmdstanpy - INFO - Chain [1] done processing
  dates = pd.date_range(
  0%|          | 5/1112 [00:01<04:14,  4.35it/s]17:52:36 - cmdstanpy - INFO - 


✅ Forecasts saved to: ../data/processed/all_crop_forecasts.csv
