In [16]:
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, r2_score
import joblib

# Set random seed for reproducibility
np.random.seed(42)

# Define years, states, and crops (updated to include 2024 and 2025)
years = list(range(2000, 2029))
states = ["Punjab", "Haryana", "Uttar Pradesh"]
crops = ["Wheat", "Rice", "Maize", "Sugarcane", "Cotton", "Barley", "Gram", "Soybean"]

# Generate synthetic data
data = []
for state in states:
    for crop in crops:
        last_inventory = np.random.randint(500, 5000)
        avg_storage = np.random.randint(2000, 10000)
        for year in years:
            production = np.random.uniform(1000, 10000)
            forecast_demand = production * np.random.uniform(0.8, 1.2)
            inventory_required = forecast_demand + np.random.uniform(-1000, 1000)
            data.append({
                "Crop": crop,
                "State": state,
                "Year": year,
                "Forecasted_Demand": round(forecast_demand, 2),
                "Last_Year_Inventory": round(last_inventory, 2),
                "Avg_Storage_Capacity": avg_storage,
                "Inventory_Required": round(inventory_required, 2)
            })
            last_inventory = production  # Update for next year

# Create DataFrame
inventory_df = pd.DataFrame(data)

# Save to CSV
os.makedirs("../data/processed", exist_ok=True)
inventory_df.to_csv("../data/processed/inventory_dataset.csv", index=False)

print("✅ inventory_dataset.csv has been updated and saved in the 'data/processed' folder.")

# Load the dataset for training
df = pd.read_csv("../data/processed/inventory_dataset.csv")

# One-hot encode categorical variables
df_encoded = pd.get_dummies(df, columns=["Crop", "State"])
X = df_encoded.drop(columns=["Inventory_Required"])
y = df_encoded["Inventory_Required"]

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the model
model = RandomForestRegressor(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Predict and evaluate
y_pred = model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"📉 MAE: {mae:.2f}")
print(f"📈 R² Score: {r2:.2f}")

# Example prediction
new_input = X_test.iloc[0:1]
predicted_inventory = model.predict(new_input)
print("Predicted Inventory Requirement:", predicted_inventory[0])

# Save the model
os.makedirs("../models", exist_ok=True)
joblib.dump(model, "../models/inventory_predictor.pkl")


✅ inventory_dataset.csv has been updated and saved in the 'data/processed' folder.
📉 MAE: 558.20
📈 R² Score: 0.95
Predicted Inventory Requirement: 6773.547099999999


['../models/inventory_predictor.pkl']