In [21]:
import pandas as pd
import numpy as np
from datetime import datetime
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import warnings
warnings.filterwarnings('ignore')

# Load the data - the CSV has proper headers
df = pd.read_csv('fulldataOG(1).csv')

# Display basic info about the dataset
print("Dataset shape:", df.shape)
print("Columns:", df.columns.tolist())
print("\nFirst few rows:")
print(df.head())

# Clean column names for easier handling
df.columns = ['Sl_no', 'District_Name', 'Market_Name', 'Commodity', 
              'Variety', 'Grade', 'Min_Price', 'Max_Price', 'Modal_Price', 'Price_Date']

# Convert data types
numeric_columns = ['Min_Price', 'Max_Price', 'Modal_Price']
for col in numeric_columns:
    df[col] = pd.to_numeric(df[col], errors='coerce')

# Convert Price_Date to datetime
df['Price_Date'] = pd.to_datetime(df['Price_Date'], format='%d-%b-%y', errors='coerce')

# Remove rows with invalid data
df = df.dropna(subset=['Price_Date', 'Modal_Price']).reset_index(drop=True)

print(f"\nData shape after cleaning: {df.shape}")
print(f"Date range: {df['Price_Date'].min()} to {df['Price_Date'].max()}")
print(f"Unique commodities: {df['Commodity'].nunique()}")
print(f"Unique districts: {df['District_Name'].nunique()}")


Dataset shape: (11991, 10)
Columns: ['Column1', 'Column2', 'Column3', 'Column4', 'Column5', 'Column6', 'Column7', 'Column8', 'Column9', 'Column10']

First few rows:
  Column1                    Column2                    Column3    Column4  \
0  Sl no.              District Name                Market Name  Commodity   
1       1  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar      Apple   
2       2  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar      Apple   
3       3  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar      Apple   
4       4  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar      Apple   

   Column5 Column6                  Column7                  Column8  \
0  Variety   Grade  Min Price (Rs./Quintal)  Max Price (Rs./Quintal)   
1    Other   Local                     1000                    15000   
2    Other   Local                     3400                     9700   
3    Other   Local                     3500                     8500   
4    O

In [22]:
def create_lag_features_fixed(df):
    """
    Create lag features with proper index handling to avoid reindex errors
    """
    
    # Create a copy to avoid modifying original dataframe
    df_with_lags = df.copy()
    
    # Extract time features first
    df_with_lags['Year'] = df_with_lags['Price_Date'].dt.year
    df_with_lags['Month'] = df_with_lags['Price_Date'].dt.month
    df_with_lags['Day'] = df_with_lags['Price_Date'].dt.day
    df_with_lags['DayOfWeek'] = df_with_lags['Price_Date'].dt.dayofweek
    df_with_lags['Quarter'] = df_with_lags['Price_Date'].dt.quarter
    
    # Create price-related features
    df_with_lags['Price_Range'] = df_with_lags['Max_Price'] - df_with_lags['Min_Price']
    df_with_lags['Price_Volatility'] = np.where(
        df_with_lags['Modal_Price'] != 0, 
        df_with_lags['Price_Range'] / df_with_lags['Modal_Price'], 
        0
    )
    
    # Create seasonal indicators
    df_with_lags['Is_Summer'] = df_with_lags['Month'].isin([3, 4, 5]).astype(int)
    df_with_lags['Is_Monsoon'] = df_with_lags['Month'].isin([6, 7, 8, 9]).astype(int)
    df_with_lags['Is_Winter'] = df_with_lags['Month'].isin([10, 11, 12, 1, 2]).astype(int)
    
    # Create lag features using a more robust method
    print("Creating lag features...")
    
    # Group by commodity and district
    grouped = df_with_lags.groupby(['Commodity', 'District_Name'])
    
    # Initialize lag columns with NaN
    for lag in [1, 7, 30]:
        df_with_lags[f'Modal_Price_lag_{lag}'] = np.nan
    
    for window in [7, 30]:
        df_with_lags[f'Modal_Price_MA_{window}'] = np.nan
    
    # Create lag features group by group
    lag_data = []
    
    for name, group in grouped:
        group_sorted = group.sort_values('Price_Date').copy()
        
        # Create lag features for this group
        for lag in [1, 7, 30]:
            group_sorted[f'Modal_Price_lag_{lag}'] = group_sorted['Modal_Price'].shift(lag)
        
        # Create moving averages for this group
        for window in [7, 30]:
            group_sorted[f'Modal_Price_MA_{window}'] = (
                group_sorted['Modal_Price']
                .rolling(window=window, min_periods=1)
                .mean()
            )
        
        lag_data.append(group_sorted)
    
    # Concatenate all groups back together
    df_with_lags = pd.concat(lag_data, ignore_index=True)
    
    # Sort by original order
    df_with_lags = df_with_lags.sort_values(['Commodity', 'District_Name', 'Price_Date']).reset_index(drop=True)
    
    print("Lag features created successfully!")
    return df_with_lags

# Apply the corrected feature engineering
df = create_lag_features_fixed(df)

print(f"Final dataset shape: {df.shape}")
print("Columns with lag features:")
lag_cols = [col for col in df.columns if 'lag' in col or 'MA' in col]
print(lag_cols)


Creating lag features...
Lag features created successfully!
Final dataset shape: (11990, 25)
Columns with lag features:
['Modal_Price_lag_1', 'Modal_Price_lag_7', 'Modal_Price_lag_30', 'Modal_Price_MA_7', 'Modal_Price_MA_30']


In [23]:
# Encode categorical variables
print("Encoding categorical variables...")

le_commodity = LabelEncoder()
le_district = LabelEncoder()
le_variety = LabelEncoder()
le_grade = LabelEncoder()

df['Commodity_encoded'] = le_commodity.fit_transform(df['Commodity'].astype(str))
df['District_encoded'] = le_district.fit_transform(df['District_Name'].astype(str))
df['Variety_encoded'] = le_variety.fit_transform(df['Variety'].astype(str))
df['Grade_encoded'] = le_grade.fit_transform(df['Grade'].astype(str))

print("Categorical encoding completed!")

# Check for any remaining NaN values
print("\nChecking for missing values:")
print(df.isnull().sum())

# Display sample of processed data
print("\nSample of processed data:")
sample_cols = ['Commodity', 'District_Name', 'Modal_Price', 'Price_Date', 
               'Modal_Price_lag_1', 'Modal_Price_MA_7']
print(df[sample_cols].head(10))


Encoding categorical variables...
Categorical encoding completed!

Checking for missing values:
Sl_no                    0
District_Name            0
Market_Name              0
Commodity                0
Variety                  0
Grade                    0
Min_Price                0
Max_Price                0
Modal_Price              0
Price_Date               0
Year                     0
Month                    0
Day                      0
DayOfWeek                0
Quarter                  0
Price_Range              0
Price_Volatility         0
Is_Summer                0
Is_Monsoon               0
Is_Winter                0
Modal_Price_lag_1       49
Modal_Price_lag_7      337
Modal_Price_lag_30    1433
Modal_Price_MA_7         0
Modal_Price_MA_30        0
Commodity_encoded        0
District_encoded         0
Variety_encoded          0
Grade_encoded            0
dtype: int64

Sample of processed data:
  Commodity              District_Name  Modal_Price Price_Date  \
0     Apple  Ch

In [24]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
%pip install xgboost
import xgboost as xgb

# Define feature columns
feature_columns = [
    'Commodity_encoded', 'District_encoded', 'Variety_encoded', 'Grade_encoded',
    'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter',
    'Min_Price', 'Max_Price', 'Price_Range', 'Price_Volatility',
    'Is_Summer', 'Is_Monsoon', 'Is_Winter'
]

# Add lag features
lag_features = [col for col in df.columns if 'lag' in col or 'MA' in col]
feature_columns.extend(lag_features)

print(f"Total features: {len(feature_columns)}")
print("Features:", feature_columns)

# Prepare data for ML
ml_df = df[feature_columns + ['Modal_Price']].copy()

# Remove rows with NaN values (due to lag features)
ml_df = ml_df.dropna().reset_index(drop=True)

print(f"ML training data shape after removing NaN: {ml_df.shape}")

if len(ml_df) == 0:
    print("❌ No data available after removing NaN values!")
    print("This might be due to insufficient data for lag features.")
    
    # Try with fewer lag features
    basic_features = [
        'Commodity_encoded', 'District_encoded', 'Variety_encoded', 'Grade_encoded',
        'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter',
        'Min_Price', 'Max_Price', 'Price_Range', 'Price_Volatility',
        'Is_Summer', 'Is_Monsoon', 'Is_Winter'
    ]
    
    ml_df = df[basic_features + ['Modal_Price']].dropna().reset_index(drop=True)
    feature_columns = basic_features
    print(f"Using basic features only. New shape: {ml_df.shape}")

if len(ml_df) > 0:
    X = ml_df[feature_columns]
    y = ml_df['Modal_Price']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, random_state=42
    )
    
    # Scale features
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Train models
    models = {
        'Random Forest': RandomForestRegressor(n_estimators=50, random_state=42, max_depth=10),
        'Gradient Boosting': GradientBoostingRegressor(n_estimators=50, random_state=42, max_depth=5),
        'Linear Regression': LinearRegression()
    }
    
    # Add XGBoost if available
    try:
        models['XGBoost'] = xgb.XGBRegressor(n_estimators=50, random_state=42, max_depth=5)
    except:
        print("XGBoost not available, skipping...")
    
    model_results = {}
    
    print("\nTraining models...")
    for name, model in models.items():
        try:
            print(f"Training {name}...")
            
            if name == 'Linear Regression':
                model.fit(X_train_scaled, y_train)
                y_pred = model.predict(X_test_scaled)
            else:
                model.fit(X_train, y_train)
                y_pred = model.predict(X_test)
            
            mae = mean_absolute_error(y_test, y_pred)
            mse = mean_squared_error(y_test, y_pred)
            r2 = r2_score(y_test, y_pred)
            
            model_results[name] = {
                'model': model,
                'MAE': mae,
                'MSE': mse,
                'R2': r2,
                'scaler': scaler if name == 'Linear Regression' else None
            }
            
            print(f"  ✓ {name} - MAE: {mae:.2f}, R²: {r2:.3f}")
            
        except Exception as e:
            print(f"  ❌ {name} failed: {str(e)}")
    
    # Find best model
    if model_results:
        best_model_name = min(model_results.keys(), key=lambda x: model_results[x]['MAE'])
        print(f"\n🏆 Best model: {best_model_name} (MAE: {model_results[best_model_name]['MAE']:.2f})")
    else:
        print("❌ No models trained successfully!")

else:
    print("❌ Cannot proceed with ML training - no valid data available!")


Note: you may need to restart the kernel to use updated packages.
Total features: 21
Features: ['Commodity_encoded', 'District_encoded', 'Variety_encoded', 'Grade_encoded', 'Year', 'Month', 'Day', 'DayOfWeek', 'Quarter', 'Min_Price', 'Max_Price', 'Price_Range', 'Price_Volatility', 'Is_Summer', 'Is_Monsoon', 'Is_Winter', 'Modal_Price_lag_1', 'Modal_Price_lag_7', 'Modal_Price_lag_30', 'Modal_Price_MA_7', 'Modal_Price_MA_30']
ML training data shape after removing NaN: (10557, 22)

Training models...
Training Random Forest...



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


  ✓ Random Forest - MAE: 52.31, R²: 0.995
Training Gradient Boosting...
  ✓ Gradient Boosting - MAE: 66.79, R²: 0.996
Training Linear Regression...
  ✓ Linear Regression - MAE: 135.94, R²: 0.924
Training XGBoost...
  ✓ XGBoost - MAE: 56.49, R²: 0.996

🏆 Best model: Random Forest (MAE: 52.31)


In [25]:
%pip install statsmodels
from statsmodels.tsa.arima.model import ARIMA
import warnings
warnings.filterwarnings('ignore')

def build_arima_models():
    """Build ARIMA models for each commodity"""
    
    print("Building ARIMA models for time series forecasting...")
    arima_models = {}
    
    commodities = df['Commodity'].unique()
    
    for commodity in commodities:
        try:
            # Get commodity data
            commodity_data = df[df['Commodity'] == commodity].copy()
            
            if len(commodity_data) < 50:  # Need sufficient data
                continue
            
            # Create time series
            ts_data = (commodity_data.groupby('Price_Date')['Modal_Price']
                      .mean()
                      .resample('D')
                      .mean()
                      .fillna(method='ffill')
                      .dropna())
            
            if len(ts_data) < 30:
                continue
            
            # Simple ARIMA model (you can optimize parameters later)
            model = ARIMA(ts_data, order=(1, 1, 1))
            fitted_model = model.fit()
            
            arima_models[commodity] = {
                'model': fitted_model,
                'data': ts_data,
                'last_date': ts_data.index[-1],
                'last_price': ts_data.iloc[-1]
            }
            
            print(f"✓ ARIMA model built for {commodity}")
            
        except Exception as e:
            print(f"❌ ARIMA failed for {commodity}: {str(e)}")
    
    print(f"ARIMA models built for {len(arima_models)} commodities")
    return arima_models

# Build ARIMA models
arima_models = build_arima_models()



[notice] A new release of pip is available: 25.0.1 -> 25.1.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Note: you may need to restart the kernel to use updated packages.
Building ARIMA models for time series forecasting...
✓ ARIMA model built for Apple
✓ ARIMA model built for Banana
✓ ARIMA model built for Brinjal
✓ ARIMA model built for Cabbage
✓ ARIMA model built for Grapes
✓ ARIMA model built for Guava
✓ ARIMA model built for Onion
✓ ARIMA model built for Papaya
✓ ARIMA model built for Potato
✓ ARIMA model built for Tomato
ARIMA models built for 10 commodities


In [26]:
def predict_future_prices(commodity, district=None, days_ahead=30):
    """
    Predict future prices for a specific commodity (and optionally district)
    """
    
    print(f"Predicting prices for {commodity}" + (f" in {district}" if district else ""))
    
    # Filter data
    if district:
        filtered_data = df[(df['Commodity'] == commodity) & 
                          (df['District_Name'] == district)].copy()
    else:
        filtered_data = df[df['Commodity'] == commodity].copy()
    
    if len(filtered_data) == 0:
        return f"No data available for {commodity}" + (f" in {district}" if district else "")
    
    # Try ARIMA first (better for time series)
    if commodity in arima_models:
        try:
            arima_model = arima_models[commodity]['model']
            forecast = arima_model.forecast(steps=days_ahead)
            
            # Create forecast dates
            last_date = arima_models[commodity]['last_date']
            forecast_dates = pd.date_range(
                start=last_date + pd.Timedelta(days=1),
                periods=days_ahead,
                freq='D'
            )
            
            return pd.DataFrame({
                'Date': forecast_dates,
                'Predicted_Price': forecast.values,
                'Method': 'ARIMA',
                'Commodity': commodity,
                'District': district if district else 'All Districts'
            })
            
        except Exception as e:
            print(f"ARIMA prediction failed: {e}, falling back to ML model")
    
    # Fallback to ML model
    try:
        # Get latest data point
        latest_data = filtered_data.sort_values('Price_Date').tail(1).copy()
        
        if len(latest_data) == 0:
            return "No recent data available"
        
        # Check if we have all required features
        missing_features = [col for col in feature_columns if col not in latest_data.columns]
        if missing_features:
            print(f"Missing features for ML prediction: {missing_features}")
            return "Cannot make ML prediction - missing features"
        
        # Prepare for prediction
        predictions = []
        current_data = latest_data.copy()
        
        best_model = model_results[best_model_name]['model']
        best_scaler = model_results[best_model_name]['scaler']
        
        for i in range(days_ahead):
            # Get features
            try:
                X_pred = current_data[feature_columns].values
                
                # Handle any remaining NaN values
                if np.isnan(X_pred).any():
                    # Fill with median values from training data
                    X_pred = np.where(np.isnan(X_pred), 
                                    np.nanmedian(X_train.values, axis=0), 
                                    X_pred)
                
                # Scale if needed
                if best_model_name == 'Linear Regression' and best_scaler:
                    X_pred = best_scaler.transform(X_pred)
                
                # Make prediction
                prediction = best_model.predict(X_pred)[0]
                
                # Ensure positive price
                prediction = max(prediction, 0)
                predictions.append(prediction)
                
                # Update for next iteration (simple approach)
                current_data['Modal_Price'] = prediction
                
            except Exception as e:
                print(f"Error in prediction step {i}: {e}")
                break
        
        # Create forecast dates
        last_date = latest_data['Price_Date'].iloc[0]
        forecast_dates = pd.date_range(
            start=last_date + pd.Timedelta(days=1),
            periods=len(predictions),
            freq='D'
        )
        
        return pd.DataFrame({
            'Date': forecast_dates,
            'Predicted_Price': predictions,
            'Method': f'ML ({best_model_name})',
            'Commodity': commodity,
            'District': district if district else 'All Districts'
        })
        
    except Exception as e:
        return f"Prediction failed: {str(e)}"

# Test the prediction function
print("Testing price prediction...")
test_commodity = df['Commodity'].iloc[0]
test_prediction = predict_future_prices(test_commodity, days_ahead=10)
print(test_prediction)


Testing price prediction...
Predicting prices for Apple
        Date  Predicted_Price Method Commodity       District
0 2025-07-28     11183.091018  ARIMA     Apple  All Districts
1 2025-07-29     11183.069101  ARIMA     Apple  All Districts
2 2025-07-30     11183.069130  ARIMA     Apple  All Districts
3 2025-07-31     11183.069130  ARIMA     Apple  All Districts
4 2025-08-01     11183.069130  ARIMA     Apple  All Districts
5 2025-08-02     11183.069130  ARIMA     Apple  All Districts
6 2025-08-03     11183.069130  ARIMA     Apple  All Districts
7 2025-08-04     11183.069130  ARIMA     Apple  All Districts
8 2025-08-05     11183.069130  ARIMA     Apple  All Districts
9 2025-08-06     11183.069130  ARIMA     Apple  All Districts


In [27]:
def market_intervention_analysis(commodity, price_increase_threshold=20):
    """
    Analyze if market intervention is needed for a specific commodity
    """
    
    print(f"\n=== Market Intervention Analysis for {commodity} ===")
    
    # Get current average price
    recent_data = df[df['Commodity'] == commodity].tail(30)
    if len(recent_data) == 0:
        return f"No data available for {commodity}"
    
    current_avg_price = recent_data['Modal_Price'].mean()
    
    # Get predictions for next 30 days
    future_prices = predict_future_prices(commodity, days_ahead=30)
    
    if isinstance(future_prices, str):
        return f"Cannot analyze {commodity}: {future_prices}"
    
    # Calculate price trends
    max_predicted_price = future_prices['Predicted_Price'].max()
    avg_predicted_price = future_prices['Predicted_Price'].mean()
    
    # Calculate percentage increase
    max_increase_pct = ((max_predicted_price - current_avg_price) / current_avg_price) * 100
    avg_increase_pct = ((avg_predicted_price - current_avg_price) / current_avg_price) * 100
    
    # Determine intervention need
    intervention_needed = max_increase_pct > price_increase_threshold
    
    # Determine urgency
    if max_increase_pct > 30:
        urgency = "HIGH"
    elif max_increase_pct > 20:
        urgency = "MEDIUM"
    else:
        urgency = "LOW"
    
    analysis_result = {
        'Commodity': commodity,
        'Current_Avg_Price': round(current_avg_price, 2),
        'Max_Predicted_Price': round(max_predicted_price, 2),
        'Avg_Predicted_Price': round(avg_predicted_price, 2),
        'Max_Price_Increase_Pct': round(max_increase_pct, 2),
        'Avg_Price_Increase_Pct': round(avg_increase_pct, 2),
        'Intervention_Needed': intervention_needed,
        'Urgency_Level': urgency,
        'Recommendation': 'RELEASE BUFFER STOCK' if intervention_needed else 'MONITOR PRICES',
        'Districts_Affected': df[df['Commodity'] == commodity]['District_Name'].nunique()
    }
    
    return analysis_result

def analyze_all_commodities():
    """Analyze all commodities for market intervention needs"""
    
    print("\n" + "="*60)
    print("COMPREHENSIVE MARKET INTERVENTION ANALYSIS")
    print("="*60)
    
    commodities = df['Commodity'].unique()
    interventions_needed = []
    stable_commodities = []
    
    for commodity in commodities:
        try:
            analysis = market_intervention_analysis(commodity)
            
            if isinstance(analysis, dict):
                if analysis['Intervention_Needed']:
                    interventions_needed.append(analysis)
                else:
                    stable_commodities.append(analysis)
            
        except Exception as e:
            print(f"Analysis failed for {commodity}: {e}")
    
    # Sort by urgency and price increase
    interventions_needed.sort(key=lambda x: (
        0 if x['Urgency_Level'] == 'HIGH' else 1 if x['Urgency_Level'] == 'MEDIUM' else 2,
        -x['Max_Price_Increase_Pct']
    ))
    
    return interventions_needed, stable_commodities

# Run comprehensive analysis
interventions_needed, stable_commodities = analyze_all_commodities()



COMPREHENSIVE MARKET INTERVENTION ANALYSIS

=== Market Intervention Analysis for Apple ===
Predicting prices for Apple

=== Market Intervention Analysis for Banana ===
Predicting prices for Banana

=== Market Intervention Analysis for Brinjal ===
Predicting prices for Brinjal

=== Market Intervention Analysis for Cabbage ===
Predicting prices for Cabbage

=== Market Intervention Analysis for Grapes ===
Predicting prices for Grapes

=== Market Intervention Analysis for Guava ===
Predicting prices for Guava

=== Market Intervention Analysis for Onion ===
Predicting prices for Onion

=== Market Intervention Analysis for Papaya ===
Predicting prices for Papaya

=== Market Intervention Analysis for Potato ===
Predicting prices for Potato

=== Market Intervention Analysis for Tomato ===
Predicting prices for Tomato


In [28]:
def generate_intervention_report():
    """Generate comprehensive intervention report"""
    
    print("\n" + "="*80)
    print("🚨 DEPARTMENT OF CONSUMER AFFAIRS - MARKET INTERVENTION REPORT")
    print("="*80)
    print(f"Report Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
    
    if interventions_needed:
        print(f"\n🔴 IMMEDIATE INTERVENTIONS REQUIRED: {len(interventions_needed)} commodities")
        print("-" * 80)
        
        # High priority interventions
        high_priority = [x for x in interventions_needed if x['Urgency_Level'] == 'HIGH']
        if high_priority:
            print("\n🚨 HIGH PRIORITY (>30% price increase expected):")
            for item in high_priority:
                print(f"  • {item['Commodity']}")
                print(f"    Current Price: ₹{item['Current_Avg_Price']}")
                print(f"    Expected Max Price: ₹{item['Max_Predicted_Price']}")
                print(f"    Price Increase: {item['Max_Price_Increase_Pct']:.1f}%")
                print(f"    Districts Affected: {item['Districts_Affected']}")
                print(f"    Action: {item['Recommendation']}")
                print()
        
        # Medium priority interventions
        medium_priority = [x for x in interventions_needed if x['Urgency_Level'] == 'MEDIUM']
        if medium_priority:
            print("🟡 MEDIUM PRIORITY (20-30% price increase expected):")
            for item in medium_priority:
                print(f"  • {item['Commodity']}: {item['Max_Price_Increase_Pct']:.1f}% increase expected")
    
    else:
        print("\n✅ NO IMMEDIATE INTERVENTIONS REQUIRED")
        print("All commodity prices are within acceptable ranges.")
    
    print(f"\n📊 MARKET SUMMARY:")
    print(f"  • Total commodities monitored: {len(df['Commodity'].unique())}")
    print(f"  • Commodities requiring intervention: {len(interventions_needed)}")
    print(f"  • Stable commodities: {len(stable_commodities)}")
    print(f"  • Districts covered: {df['District_Name'].nunique()}")
    
    # Create detailed intervention table
    if interventions_needed:
        print(f"\n📋 DETAILED INTERVENTION TABLE:")
        intervention_df = pd.DataFrame(interventions_needed)
        print(intervention_df[['Commodity', 'Current_Avg_Price', 'Max_Predicted_Price', 
                             'Max_Price_Increase_Pct', 'Urgency_Level', 'Recommendation']].to_string(index=False))
    
    return interventions_needed

# Generate the final report
final_report = generate_intervention_report()



🚨 DEPARTMENT OF CONSUMER AFFAIRS - MARKET INTERVENTION REPORT
Report Generated: 2025-07-29 21:41:30

🔴 IMMEDIATE INTERVENTIONS REQUIRED: 3 commodities
--------------------------------------------------------------------------------

🚨 HIGH PRIORITY (>30% price increase expected):
  • Tomato
    Current Price: ₹1411.67
    Expected Max Price: ₹2469.63
    Price Increase: 74.9%
    Districts Affected: 5
    Action: RELEASE BUFFER STOCK

🟡 MEDIUM PRIORITY (20-30% price increase expected):
  • Guava: 23.6% increase expected
  • Apple: 22.4% increase expected

📊 MARKET SUMMARY:
  • Total commodities monitored: 10
  • Commodities requiring intervention: 3
  • Stable commodities: 7
  • Districts covered: 5

📋 DETAILED INTERVENTION TABLE:
Commodity  Current_Avg_Price  Max_Predicted_Price  Max_Price_Increase_Pct Urgency_Level       Recommendation
   Tomato            1411.67              2469.63                   74.94          HIGH RELEASE BUFFER STOCK
    Guava            3230.00            

In [29]:
def interactive_prediction_system():
    """Interactive system for specific commodity predictions"""
    
    print("\n" + "="*50)
    print("🔮 INTERACTIVE PRICE PREDICTION SYSTEM")
    print("="*50)
    
    available_commodities = df['Commodity'].unique()
    available_districts = df['District_Name'].unique()
    
    print(f"Available commodities: {len(available_commodities)}")
    print(f"Available districts: {len(available_districts)}")
    
    # Example predictions for major commodities
    major_commodities = ['Apple', 'Banana', 'Brinjal', 'Cabbage', 'Grapes']
    available_major = [c for c in major_commodities if c in available_commodities]
    
    if not available_major:
        available_major = list(available_commodities)[:5]
    
    print(f"\nGenerating predictions for major commodities...")
    
    for commodity in available_major:
        print(f"\n--- {commodity.upper()} PRICE FORECAST ---")
        
        # Get 15-day prediction
        prediction = predict_future_prices(commodity, days_ahead=15)
        
        if isinstance(prediction, pd.DataFrame):
            print("Next 7 days:")
            display_pred = prediction.head(7)[['Date', 'Predicted_Price', 'Method']]
            display_pred['Date'] = display_pred['Date'].dt.strftime('%Y-%m-%d')
            display_pred['Predicted_Price'] = display_pred['Predicted_Price'].round(2)
            print(display_pred.to_string(index=False))
            
            # Show trend
            current_price = df[df['Commodity'] == commodity]['Modal_Price'].tail(5).mean()
            avg_predicted = prediction['Predicted_Price'].head(7).mean()
            trend = "📈 INCREASING" if avg_predicted > current_price else "📉 DECREASING"
            change = ((avg_predicted - current_price) / current_price) * 100
            
            print(f"Current avg: ₹{current_price:.2f}")
            print(f"7-day avg predicted: ₹{avg_predicted:.2f}")
            print(f"Trend: {trend} ({change:+.1f}%)")
        else:
            print(prediction)

# Run the interactive system
interactive_prediction_system()



🔮 INTERACTIVE PRICE PREDICTION SYSTEM
Available commodities: 10
Available districts: 5

Generating predictions for major commodities...

--- APPLE PRICE FORECAST ---
Predicting prices for Apple
Next 7 days:
      Date  Predicted_Price Method
2025-07-28         11183.09  ARIMA
2025-07-29         11183.07  ARIMA
2025-07-30         11183.07  ARIMA
2025-07-31         11183.07  ARIMA
2025-08-01         11183.07  ARIMA
2025-08-02         11183.07  ARIMA
2025-08-03         11183.07  ARIMA
Current avg: ₹8800.00
7-day avg predicted: ₹11183.07
Trend: 📈 INCREASING (+27.1%)

--- BANANA PRICE FORECAST ---
Predicting prices for Banana
Next 7 days:
      Date  Predicted_Price Method
2025-07-28          1286.92  ARIMA
2025-07-29          1293.49  ARIMA
2025-07-30          1293.29  ARIMA
2025-07-31          1293.30  ARIMA
2025-08-01          1293.30  ARIMA
2025-08-02          1293.30  ARIMA
2025-08-03          1293.30  ARIMA
Current avg: ₹1480.00
7-day avg predicted: ₹1292.41
Trend: 📉 DECREASING (-12.

In [30]:
def save_models_and_results():
    """Save trained models and results for future use"""
    
    import pickle
    
    # Save ML models
    with open('price_prediction_models.pkl', 'wb') as f:
        pickle.dump(model_results, f)
    
    # Save ARIMA models
    with open('arima_models.pkl', 'wb') as f:
        pickle.dump(arima_models, f)
    
    # Save intervention results
    if interventions_needed:
        intervention_df = pd.DataFrame(interventions_needed)
        intervention_df.to_csv('intervention_recommendations.csv', index=False)
    
    # Save feature columns for future predictions
    with open('feature_columns.pkl', 'wb') as f:
        pickle.dump(feature_columns, f)
    
    print("✅ Models and results saved successfully!")
    print("Files created:")
    print("  • price_prediction_models.pkl")
    print("  • arima_models.pkl") 
    print("  • intervention_recommendations.csv")
    print("  • feature_columns.pkl")

# Save everything
save_models_and_results()




✅ Models and results saved successfully!
Files created:
  • price_prediction_models.pkl
  • arima_models.pkl
  • intervention_recommendations.csv
  • feature_columns.pkl
