In [1]:
# ============================================================================
# MODIFIED OPEN_INV NOTEBOOK FOR DEMO1.CSV DATA
# Agricultural Market Price Prediction & Intervention Analysis System
# ============================================================================

import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# ============================================================================
# STEP 1: DATA LOADING AND PREPROCESSING FOR DEMO1.CSV
# ============================================================================

def load_and_clean_demo_data():
    """Load and clean demo1.csv data"""
    print("Loading demo1.csv data...")
    
    # Load the demo data
    df = pd.read_csv('demo1.csv')
    
    # Display basic info
    print(f"Original dataset shape: {df.shape}")
    print("Original columns:", df.columns.tolist())
    
    # Clean column names for easier handling
    df.columns = ['Sl_no', 'District_Name', 'Market_Name', 'Commodity', 
                  'Variety', 'Grade', 'Min_Price', 'Max_Price', 'Price_Date', 
                  'Modal_Price', 'Min_Price_dup', 'Max_Price_dup']
    
    # Drop duplicate columns
    df = df.drop(columns=['Min_Price_dup', 'Max_Price_dup'])
    
    # Convert data types
    numeric_columns = ['Min_Price', 'Max_Price', 'Modal_Price']
    for col in numeric_columns:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    
    # Convert Price_Date to datetime (format: dd-MMM-yy)
    df['Price_Date'] = pd.to_datetime(df['Price_Date'], format='%d-%b-%y', errors='coerce')
    
    # Remove rows with invalid data
    df = df.dropna(subset=['Price_Date', 'Modal_Price']).reset_index(drop=True)
    
    # Sort data for consistency
    df = df.sort_values(['Commodity', 'District_Name', 'Price_Date']).reset_index(drop=True)
    
    print(f"Cleaned dataset shape: {df.shape}")
    print(f"Date range: {df['Price_Date'].min()} to {df['Price_Date'].max()}")
    print(f"Unique commodities: {df['Commodity'].nunique()}")
    print(f"Unique districts: {df['District_Name'].nunique()}")
    print(f"Unique markets: {df['Market_Name'].nunique()}")
    
    return df

# Load the data
df = load_and_clean_demo_data()

Loading demo1.csv data...
Original dataset shape: (11990, 12)
Original columns: ['Sl no.', 'District Name', 'Market Name', 'Commodity', 'Variety', 'Grade', 'Min Price (Rs./Quintal)', 'Max Price (Rs./Quintal)', 'Price Date', 'Modal Price (Rs./kg)', 'Min Price ', 'Max price']
Cleaned dataset shape: (11990, 10)
Date range: 2024-07-27 00:00:00 to 2025-07-27 00:00:00
Unique commodities: 10
Unique districts: 5
Unique markets: 9


In [2]:
# ============================================================================
# STEP 2: FEATURE ENGINEERING
# ============================================================================

def create_comprehensive_features(df):
    """Create comprehensive features for price prediction"""
    print("Creating comprehensive features...")
    
    df_features = df.copy()
    
    # Time-based features
    df_features['Year'] = df_features['Price_Date'].dt.year
    df_features['Month'] = df_features['Price_Date'].dt.month
    df_features['Day'] = df_features['Price_Date'].dt.day
    df_features['DayOfWeek'] = df_features['Price_Date'].dt.dayofweek
    df_features['Quarter'] = df_features['Price_Date'].dt.quarter
    df_features['Week'] = df_features['Price_Date'].dt.isocalendar().week
    
    # Price-related features
    df_features['Price_Range'] = df_features['Max_Price'] - df_features['Min_Price']
    df_features['Price_Volatility'] = np.where(
        df_features['Modal_Price'] != 0,
        df_features['Price_Range'] / df_features['Modal_Price'],
        0
    )
    df_features['Price_Mid'] = (df_features['Max_Price'] + df_features['Min_Price']) / 2
    df_features['Modal_vs_Mid'] = df_features['Modal_Price'] - df_features['Price_Mid']
    
    # Seasonal indicators
    df_features['Is_Summer'] = df_features['Month'].isin([3, 4, 5]).astype(int)
    df_features['Is_Monsoon'] = df_features['Month'].isin([6, 7, 8, 9]).astype(int)
    df_features['Is_Winter'] = df_features['Month'].isin([10, 11, 12, 1, 2]).astype(int)
    df_features['Is_Peak_Season'] = df_features['Month'].isin([10, 11, 12, 1]).astype(int)
    
    # Create lag features by commodity and district
    print("Creating lag features...")
    grouped = df_features.groupby(['Commodity', 'District_Name'])
    
    # Initialize lag columns
    lag_periods = [1, 3, 7, 15, 30]
    ma_windows = [3, 7, 15, 30]
    
    for lag in lag_periods:
        df_features[f'Modal_Price_lag_{lag}'] = np.nan
    for window in ma_windows:
        df_features[f'Modal_Price_MA_{window}'] = np.nan
        df_features[f'Price_Range_MA_{window}'] = np.nan
    
    # Create features group by group
    lag_data = []
    for name, group in grouped:
        group_sorted = group.sort_values('Price_Date').copy()
        
        # Lag features
        for lag in lag_periods:
            group_sorted[f'Modal_Price_lag_{lag}'] = group_sorted['Modal_Price'].shift(lag)
        
        # Moving averages
        for window in ma_windows:
            group_sorted[f'Modal_Price_MA_{window}'] = (
                group_sorted['Modal_Price'].rolling(window=window, min_periods=1).mean()
            )
            group_sorted[f'Price_Range_MA_{window}'] = (
                group_sorted['Price_Range'].rolling(window=window, min_periods=1).mean()
            )
        
        # Price trends
        group_sorted['Price_Trend_7'] = (
            group_sorted['Modal_Price'] - group_sorted['Modal_Price'].shift(7)
        )
        group_sorted['Price_Trend_30'] = (
            group_sorted['Modal_Price'] - group_sorted['Modal_Price'].shift(30)
        )
        
        lag_data.append(group_sorted)
    
    # Concatenate all groups
    df_final = pd.concat(lag_data, ignore_index=True)
    df_final = df_final.sort_values(['Commodity', 'District_Name', 'Price_Date']).reset_index(drop=True)
    
    print("Feature engineering completed!")
    return df_final

# Create features
df = create_comprehensive_features(df)

Creating comprehensive features...
Creating lag features...
Feature engineering completed!


In [3]:
# ============================================================================
# STEP 3: CATEGORICAL ENCODING
# ============================================================================

print("Encoding categorical variables...")

# Label encoders
le_commodity = LabelEncoder()
le_district = LabelEncoder()
le_market = LabelEncoder()
le_variety = LabelEncoder()
le_grade = LabelEncoder()

df['Commodity_encoded'] = le_commodity.fit_transform(df['Commodity'].astype(str))
df['District_encoded'] = le_district.fit_transform(df['District_Name'].astype(str))
df['Market_encoded'] = le_market.fit_transform(df['Market_Name'].astype(str))
df['Variety_encoded'] = le_variety.fit_transform(df['Variety'].astype(str))
df['Grade_encoded'] = le_grade.fit_transform(df['Grade'].astype(str))

print("Categorical encoding completed!")

# Check for missing values
print("\nMissing values in key columns:")
missing_cols = df.isnull().sum()
print(missing_cols[missing_cols > 0])

Encoding categorical variables...
Categorical encoding completed!

Missing values in key columns:
Modal_Price_lag_1       49
Modal_Price_lag_3      145
Modal_Price_lag_7      337
Modal_Price_lag_15     721
Modal_Price_lag_30    1433
Price_Trend_7          337
Price_Trend_30        1433
dtype: int64


In [4]:
# ============================================================================
# STEP 4: MACHINE LEARNING MODEL TRAINING
# ============================================================================

def train_prediction_models():
    """Train multiple ML models for price prediction"""
    print("Training machine learning models...")
    
    # Define feature columns
    feature_columns = [
        'Commodity_encoded', 'District_encoded', 'Market_encoded', 
        'Variety_encoded', 'Grade_encoded',
        'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter', 'Week',
        'Min_Price', 'Max_Price', 'Price_Range', 'Price_Mid', 'Modal_vs_Mid',
        'Price_Volatility', 'Is_Summer', 'Is_Monsoon', 'Is_Winter', 'Is_Peak_Season'
    ]
    
    # Add available lag features
    lag_features = [col for col in df.columns if 'lag' in col or 'MA' in col or 'Trend' in col]
    feature_columns.extend(lag_features)
    
    print(f"Total features: {len(feature_columns)}")
    
    # Prepare ML dataset
    ml_df = df[feature_columns + ['Modal_Price']].copy()
    ml_df = ml_df.dropna().reset_index(drop=True)
    
    print(f"ML training data shape: {ml_df.shape}")
    
    if len(ml_df) == 0:
        print("❌ No data available for ML training!")
        return None, None
    
    # Split features and target
    X = ml_df[feature_columns]
    y = ml_df['Modal_Price']
    
    # Train-test split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42, stratify=ml_df.iloc[:, 0] if len(ml_df) > 100 else None
    )
    
    # Scale features for linear models
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Define models
    models = {
        'Random Forest': RandomForestRegressor(
            n_estimators=100, max_depth=15, random_state=42, n_jobs=-1
        ),
        'Gradient Boosting': GradientBoostingRegressor(
            n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42
        ),
        'Linear Regression': LinearRegression()
    }
    
    # Add XGBoost if available
    try:
        import xgboost as xgb
        models['XGBoost'] = xgb.XGBRegressor(
            n_estimators=100, max_depth=6, learning_rate=0.1, random_state=42
        )
    except ImportError:
        print("XGBoost not available, skipping...")
    
    # Train and evaluate models
    model_results = {}
    
    for name, model in models.items():
        try:
            print(f"Training {name}...")
            
            if name == 'Linear Regression':
                model.fit(X_train_scaled, y_train)
                y_pred = model.predict(X_test_scaled)
                X_for_pred = X_test_scaled
            else:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
                X_for_pred = X_test
            
            # Calculate metrics
            mae = mean_absolute_error(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            
            model_results[name] = {
                'model': model,
                'MAE': mae,
                'MSE': mse,
                'R2': r2,
                'scaler': scaler if name == 'Linear Regression' else None
            }
            
            print(f"✓ {name} - MAE: {mae:.2f}, RMSE: {np.sqrt(mse):.2f}, R²: {r2:.3f}")
            
        except Exception as e:
            print(f"❌ {name} failed: {str(e)}")
    
    # Find best model
    if model_results:
        best_model_name = min(model_results.keys(), key=lambda x: model_results[x]['MAE'])
        print(f"\n🏆 Best model: {best_model_name} (MAE: {model_results[best_model_name]['MAE']:.2f})")
        return model_results, feature_columns
    
    return None, None

# Train models
model_results, feature_columns = train_prediction_models()

Training machine learning models...
Total features: 36
ML training data shape: (10557, 37)
Training Random Forest...
✓ Random Forest - MAE: 0.34, RMSE: 1.19, R²: 0.998
Training Gradient Boosting...
✓ Gradient Boosting - MAE: 0.39, RMSE: 1.18, R²: 0.998
Training Linear Regression...
✓ Linear Regression - MAE: 0.00, RMSE: 0.00, R²: 1.000
Training XGBoost...
✓ XGBoost - MAE: 0.42, RMSE: 1.12, R²: 0.998

🏆 Best model: Linear Regression (MAE: 0.00)


In [5]:
# ============================================================================
# STEP 5: TIME SERIES ANALYSIS (ARIMA)
# ============================================================================

def build_arima_models():
    """Build ARIMA models for time series forecasting"""
    print("Building ARIMA models...")
    
    try:
        from statsmodels.tsa.arima.model import ARIMA
    except ImportError:
        print("❌ statsmodels not available. Install with: pip install statsmodels")
        return {}
    
    arima_models = {}
    commodities = df['Commodity'].unique()
    
    for commodity in commodities:
        try:
            # Get commodity data
            commodity_data = df[df['Commodity'] == commodity].copy()
            
            if len(commodity_data) < 50:
                continue
            
            # Create time series
            ts_data = (commodity_data.groupby('Price_Date')['Modal_Price']
                      .mean()
                      .resample('D')
                      .mean()
                      .fillna(method='ffill')
                      .dropna())
            
            if len(ts_data) < 30:
                continue
            
            # Fit ARIMA model
            model = ARIMA(ts_data, order=(1, 1, 1))
            fitted_model = model.fit()
            
            arima_models[commodity] = {
                'model': fitted_model,
                'data': ts_data,
                'last_date': ts_data.index[-1],
                'last_price': ts_data.iloc[-1]
            }
            
            print(f"✓ ARIMA model built for {commodity}")
            
        except Exception as e:
            print(f"❌ ARIMA failed for {commodity}: {str(e)}")
    
    print(f"ARIMA models built for {len(arima_models)} commodities")
    return arima_models

# Build ARIMA models
arima_models = build_arima_models()

Building ARIMA models...
✓ ARIMA model built for Apple
✓ ARIMA model built for Banana
✓ ARIMA model built for Brinjal
✓ ARIMA model built for Cabbage
✓ ARIMA model built for Grapes
✓ ARIMA model built for Guava
✓ ARIMA model built for Onion
✓ ARIMA model built for Papaya
✓ ARIMA model built for Potato
✓ ARIMA model built for Tomato
ARIMA models built for 10 commodities


In [6]:
# ============================================================================
# STEP 6: PRICE PREDICTION FUNCTION
# ============================================================================

def predict_future_prices(commodity, district=None, days_ahead=30):
    """Predict future prices for a specific commodity"""
    print(f"Predicting prices for {commodity}" + (f" in {district}" if district else ""))
    
    # Filter data
    if district:
        filtered_data = df[(df['Commodity'] == commodity) & 
                          (df['District_Name'] == district)].copy()
    else:
        filtered_data = df[df['Commodity'] == commodity].copy()
    
    if len(filtered_data) == 0:
        return f"No data available for {commodity}" + (f" in {district}" if district else "")
    
    # Try ARIMA first
    if commodity in arima_models:
        try:
            arima_model = arima_models[commodity]['model']
            forecast = arima_model.forecast(steps=days_ahead)
            
            last_date = arima_models[commodity]['last_date']
            forecast_dates = pd.date_range(
                start=last_date + pd.Timedelta(days=1),
                periods=days_ahead,
                freq='D'
            )
            
            return pd.DataFrame({
                'Date': forecast_dates,
                'Predicted_Price': forecast.values,
                'Method': 'ARIMA',
                'Commodity': commodity,
                'District': district if district else 'All Districts'
            })
            
        except Exception as e:
            print(f"ARIMA prediction failed: {e}")
    
    # Fallback to ML model
    if model_results is None:
        return "No trained models available"
    
    try:
        best_model_name = min(model_results.keys(), key=lambda x: model_results[x]['MAE'])
        best_model = model_results[best_model_name]['model']
        best_scaler = model_results[best_model_name]['scaler']
        
        # Get latest data point
        latest_data = filtered_data.sort_values('Price_Date').tail(1).copy()
        
        if len(latest_data) == 0:
            return "No recent data available"
        
        # Simple prediction (can be enhanced)
        predictions = []
        for i in range(days_ahead):
            try:
                X_pred = latest_data[feature_columns].values
                
                # Handle missing values
                if np.isnan(X_pred).any():
                    X_pred = np.nan_to_num(X_pred, nan=0)
                
                # Scale if needed
                if best_model_name == 'Linear Regression' and best_scaler:
                    X_pred = best_scaler.transform(X_pred)
                
                # Predict
                prediction = best_model.predict(X_pred)[0]
                prediction = max(prediction, 0)  # Ensure positive
                predictions.append(prediction)
                
            except Exception as e:
                print(f"Error in prediction step {i}: {e}")
                break
        
        # Create forecast dates
        last_date = latest_data['Price_Date'].iloc[0]
        forecast_dates = pd.date_range(
            start=last_date + pd.Timedelta(days=1),
            periods=len(predictions),
            freq='D'
        )
        
        return pd.DataFrame({
            'Date': forecast_dates,
            'Predicted_Price': predictions,
            'Method': f'ML ({best_model_name})',
            'Commodity': commodity,
            'District': district if district else 'All Districts'
        })
        
    except Exception as e:
        return f"Prediction failed: {str(e)}"

In [7]:
# ============================================================================
# STEP 7: MARKET INTERVENTION ANALYSIS
# ============================================================================

def market_intervention_analysis(commodity, price_increase_threshold=20):
    """Analyze if market intervention is needed"""
    print(f"\n=== Market Intervention Analysis for {commodity} ===")
    
    # Get recent price data
    recent_data = df[df['Commodity'] == commodity].tail(30)
    if len(recent_data) == 0:
        return f"No data available for {commodity}"
    
    current_avg_price = recent_data['Modal_Price'].mean()
    
    # Get price predictions
    future_prices = predict_future_prices(commodity, days_ahead=30)
    
    if isinstance(future_prices, str):
        return f"Cannot analyze {commodity}: {future_prices}"
    
    # Calculate trends
    max_predicted_price = future_prices['Predicted_Price'].max()
    avg_predicted_price = future_prices['Predicted_Price'].mean()
    
    # Calculate percentage changes
    max_increase_pct = ((max_predicted_price - current_avg_price) / current_avg_price) * 100
    avg_increase_pct = ((avg_predicted_price - current_avg_price) / current_avg_price) * 100
    
    # Determine intervention need
    intervention_needed = max_increase_pct > price_increase_threshold
    
    # Determine urgency
    if max_increase_pct > 30:
        urgency = "HIGH"
    elif max_increase_pct > 20:
        urgency = "MEDIUM"
    else:
        urgency = "LOW"
    
    return {
        'Commodity': commodity,
        'Current_Avg_Price': round(current_avg_price, 2),
        'Max_Predicted_Price': round(max_predicted_price, 2),
        'Avg_Predicted_Price': round(avg_predicted_price, 2),
        'Max_Price_Increase_Pct': round(max_increase_pct, 2),
        'Avg_Price_Increase_Pct': round(avg_increase_pct, 2),
        'Intervention_Needed': intervention_needed,
        'Urgency_Level': urgency,
        'Recommendation': 'RELEASE BUFFER STOCK' if intervention_needed else 'MONITOR PRICES',
        'Districts_Affected': df[df['Commodity'] == commodity]['District_Name'].nunique()
    }

def comprehensive_market_analysis():
    """Analyze all commodities for intervention needs"""
    print("\n" + "="*80)
    print("🚨 COMPREHENSIVE MARKET INTERVENTION ANALYSIS")
    print("="*80)
    
    commodities = df['Commodity'].unique()
    interventions_needed = []
    stable_commodities = []
    
    for commodity in commodities:
        try:
            analysis = market_intervention_analysis(commodity)
            if isinstance(analysis, dict):
                if analysis['Intervention_Needed']:
                    interventions_needed.append(analysis)
                else:
                    stable_commodities.append(analysis)
        except Exception as e:
            print(f"Analysis failed for {commodity}: {e}")
    
    # Sort by urgency
    interventions_needed.sort(key=lambda x: (
        0 if x['Urgency_Level'] == 'HIGH' else 1 if x['Urgency_Level'] == 'MEDIUM' else 2,
        -x['Max_Price_Increase_Pct']
    ))
    
    return interventions_needed, stable_commodities

# Run comprehensive analysis
interventions_needed, stable_commodities = comprehensive_market_analysis()


🚨 COMPREHENSIVE MARKET INTERVENTION ANALYSIS

=== Market Intervention Analysis for Apple ===
Predicting prices for Apple

=== Market Intervention Analysis for Banana ===
Predicting prices for Banana

=== Market Intervention Analysis for Brinjal ===
Predicting prices for Brinjal

=== Market Intervention Analysis for Cabbage ===
Predicting prices for Cabbage

=== Market Intervention Analysis for Grapes ===
Predicting prices for Grapes

=== Market Intervention Analysis for Guava ===
Predicting prices for Guava

=== Market Intervention Analysis for Onion ===
Predicting prices for Onion

=== Market Intervention Analysis for Papaya ===
Predicting prices for Papaya

=== Market Intervention Analysis for Potato ===
Predicting prices for Potato

=== Market Intervention Analysis for Tomato ===
Predicting prices for Tomato


In [8]:
# ============================================================================
# STEP 8: GENERATE FINAL REPORT
# ============================================================================

def generate_final_report():
    """Generate comprehensive intervention report"""
    print("\n" + "="*100)
    print("📊 AGRICULTURAL MARKET PRICE PREDICTION & INTERVENTION REPORT")
    print("="*100)
    print(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    print(f"Data Source: demo1.csv")
    print(f"Analysis Period: {df['Price_Date'].min().date()} to {df['Price_Date'].max().date()}")
    
    if interventions_needed:
        print(f"\n🔴 IMMEDIATE INTERVENTIONS REQUIRED: {len(interventions_needed)} commodities")
        print("-" * 100)
        
        # High priority
        high_priority = [x for x in interventions_needed if x['Urgency_Level'] == 'HIGH']
        if high_priority:
            print("\n🚨 HIGH PRIORITY (>30% price increase expected):")
            for item in high_priority:
                print(f"   • {item['Commodity']}")
                print(f"     Current Price: ₹{item['Current_Avg_Price']}")
                print(f"     Expected Max Price: ₹{item['Max_Predicted_Price']}")
                print(f"     Price Increase: {item['Max_Price_Increase_Pct']:.1f}%")
                print(f"     Districts Affected: {item['Districts_Affected']}")
                print(f"     Action: {item['Recommendation']}")
                print()
        
        # Medium priority
        medium_priority = [x for x in interventions_needed if x['Urgency_Level'] == 'MEDIUM']
        if medium_priority:
            print("🟡 MEDIUM PRIORITY (20-30% price increase expected):")
            for item in medium_priority:
                print(f"   • {item['Commodity']}: {item['Max_Price_Increase_Pct']:.1f}% increase expected")
    
    else:
        print("\n✅ NO IMMEDIATE INTERVENTIONS REQUIRED")
        print("All commodity prices are within acceptable ranges.")
    
    # Market summary
    print(f"\n📊 MARKET SUMMARY:")
    print(f"   • Total commodities monitored: {df['Commodity'].nunique()}")
    print(f"   • Total districts covered: {df['District_Name'].nunique()}")
    print(f"   • Total markets: {df['Market_Name'].nunique()}")
    print(f"   • Commodities requiring intervention: {len(interventions_needed)}")
    print(f"   • Stable commodities: {len(stable_commodities)}")
    print(f"   • Data records analyzed: {len(df):,}")
    
    # Model performance summary
    if model_results:
        print(f"\n🤖 MODEL PERFORMANCE:")
        for name, results in model_results.items():
            print(f"   • {name}: MAE={results['MAE']:.2f}, R²={results['R2']:.3f}")
    
    # Detailed intervention table
    if interventions_needed:
        print(f"\n📋 DETAILED INTERVENTION RECOMMENDATIONS:")
        intervention_df = pd.DataFrame(interventions_needed)
        display_cols = ['Commodity', 'Current_Avg_Price', 'Max_Predicted_Price', 
                       'Max_Price_Increase_Pct', 'Urgency_Level', 'Recommendation']
        print(intervention_df[display_cols].to_string(index=False))
    
    return interventions_needed

# Generate final report
final_interventions = generate_final_report()


📊 AGRICULTURAL MARKET PRICE PREDICTION & INTERVENTION REPORT
Report Generated: 2025-07-29 23:46:04
Data Source: demo1.csv
Analysis Period: 2024-07-27 to 2025-07-27

🔴 IMMEDIATE INTERVENTIONS REQUIRED: 3 commodities
----------------------------------------------------------------------------------------------------

🚨 HIGH PRIORITY (>30% price increase expected):
   • Tomato
     Current Price: ₹14.12
     Expected Max Price: ₹24.7
     Price Increase: 75.0%
     Districts Affected: 5
     Action: RELEASE BUFFER STOCK

🟡 MEDIUM PRIORITY (20-30% price increase expected):
   • Guava: 23.7% increase expected
   • Apple: 22.1% increase expected

📊 MARKET SUMMARY:
   • Total commodities monitored: 10
   • Total districts covered: 5
   • Total markets: 9
   • Commodities requiring intervention: 3
   • Stable commodities: 7
   • Data records analyzed: 11,990

🤖 MODEL PERFORMANCE:
   • Random Forest: MAE=0.34, R²=0.998
   • Gradient Boosting: MAE=0.39, R²=0.998
   • Linear Regression: MAE=0.00

In [9]:
# ============================================================================
# STEP 9: INTERACTIVE PREDICTION SYSTEM
# ============================================================================

def interactive_predictions():
    """Generate predictions for all commodities"""
    print("\n" + "="*80)
    print("🔮 COMMODITY PRICE FORECASTS")
    print("="*80)
    
    commodities = df['Commodity'].unique()
    
    for commodity in commodities[:5]:  # Show first 5 commodities
        print(f"\n--- {commodity.upper()} PRICE FORECAST ---")
        
        # Get 15-day prediction
        prediction = predict_future_prices(commodity, days_ahead=15)
        
        if isinstance(prediction, pd.DataFrame):
            print("Next 7 days forecast:")
            display_pred = prediction.head(7)[['Date', 'Predicted_Price', 'Method']].copy()
            display_pred['Date'] = display_pred['Date'].dt.strftime('%Y-%m-%d')
            display_pred['Predicted_Price'] = display_pred['Predicted_Price'].round(2)
            print(display_pred.to_string(index=False))
            
            # Show trend
            current_price = df[df['Commodity'] == commodity]['Modal_Price'].tail(10).mean()
            avg_predicted = prediction['Predicted_Price'].head(7).mean()
            trend = "📈 INCREASING" if avg_predicted > current_price else "📉 DECREASING"
            change = ((avg_predicted - current_price) / current_price) * 100
            
            print(f"Current avg: ₹{current_price:.2f}")
            print(f"7-day avg predicted: ₹{avg_predicted:.2f}")
            print(f"Trend: {trend} ({change:+.1f}%)")
        else:
            print(prediction)

# Run interactive predictions
interactive_predictions()


🔮 COMMODITY PRICE FORECASTS

--- APPLE PRICE FORECAST ---
Predicting prices for Apple
Next 7 days forecast:
      Date  Predicted_Price Method
2025-07-28            111.5  ARIMA
2025-07-29            111.5  ARIMA
2025-07-30            111.5  ARIMA
2025-07-31            111.5  ARIMA
2025-08-01            111.5  ARIMA
2025-08-02            111.5  ARIMA
2025-08-03            111.5  ARIMA
Current avg: ₹93.00
7-day avg predicted: ₹111.50
Trend: 📈 INCREASING (+19.9%)

--- BANANA PRICE FORECAST ---
Predicting prices for Banana
Next 7 days forecast:
      Date  Predicted_Price Method
2025-07-28            12.88  ARIMA
2025-07-29            12.93  ARIMA
2025-07-30            12.93  ARIMA
2025-07-31            12.93  ARIMA
2025-08-01            12.93  ARIMA
2025-08-02            12.93  ARIMA
2025-08-03            12.93  ARIMA
Current avg: ₹14.40
7-day avg predicted: ₹12.92
Trend: 📉 DECREASING (-10.2%)

--- BRINJAL PRICE FORECAST ---
Predicting prices for Brinjal
Next 7 days forecast:
      Date

In [10]:
# ============================================================================
# STEP 10: SAVE RESULTS
# ============================================================================

def save_results():
    """Save models and results"""
    import pickle
    
    try:
        # Save ML models
        if model_results:
            with open('demo1_ml_models.pkl', 'wb') as f:
                pickle.dump(model_results, f)
        
        # Save ARIMA models
        if arima_models:
            with open('demo1_arima_models.pkl', 'wb') as f:
                pickle.dump(arima_models, f)
        
        # Save intervention results
        if final_interventions:
            intervention_df = pd.DataFrame(final_interventions)
            intervention_df.to_csv('demo1_intervention_recommendations.csv', index=False)
        
        # Save processed data
        df.to_csv('demo1_processed_data.csv', index=False)
        
        print("\n✅ Results saved successfully!")
        print("Files created:")
        print("   • demo1_ml_models.pkl")
        print("   • demo1_arima_models.pkl") 
        print("   • demo1_intervention_recommendations.csv")
        print("   • demo1_processed_data.csv")
        
    except Exception as e:
        print(f"❌ Error saving results: {e}")

# Save all results
save_results()

print("\n" + "="*100)
print("🎉 ANALYSIS COMPLETE!")
print("="*100)


✅ Results saved successfully!
Files created:
   • demo1_ml_models.pkl
   • demo1_arima_models.pkl
   • demo1_intervention_recommendations.csv
   • demo1_processed_data.csv

🎉 ANALYSIS COMPLETE!


In [None]:
sns.hetmap()