In [23]:
# ============================================================================
# STEP 1: INSTALL AND IMPORT REQUIRED LIBRARIES
# ============================================================================

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
from datetime import datetime, timedelta
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Machine Learning
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler

# Time Series Analysis
try:
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa.stattools import adfuller
except ImportError:
    print("Installing statsmodels...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "statsmodels"])
    from statsmodels.tsa.arima.model import ARIMA
    from statsmodels.tsa.stattools import adfuller

# XGBoost
try:
    import xgboost as xgb
except ImportError:
    print("Installing xgboost...")
    import subprocess
    import sys
    subprocess.check_call([sys.executable, "-m", "pip", "install", "xgboost"])
    import xgboost as xgb

# Suppress warnings
warnings.filterwarnings('ignore')

print("✅ All libraries imported successfully!")


✅ All libraries imported successfully!


In [24]:
import pandas as pd

# ============================================================================
# STEP 2: LOAD AND PREPROCESS YOUR DATA
# ============================================================================

def load_and_preprocess_data(file_path):
    """Load and preprocess your commodity price data"""
    print("📊 Loading commodity price data from your file...")

    try:
        # Load data
        df = pd.read_csv(file_path)
        print(f"✅ Data loaded successfully: {len(df)} records")

        # Show first few rows
        print("\n📋 Data Structure:")
        print(df.head())
        print(f"\n✅ Exact column names before cleaning: {list(df.columns)}")

        # Clean column names (remove extra spaces)
        df.columns = df.columns.str.strip()

        # Display cleaned column names
        print(f"✅ Cleaned column names: {list(df.columns)}")

        # Convert Price Date to datetime
        df['Date'] = pd.to_datetime(df['Price Date'], format='%d-%b-%y', errors='coerce')

        # Drop rows with missing or invalid dates
        df = df.dropna(subset=['Date'])

        # Ensure Model Price (Rs./kg) is numeric
        df['Model Price (Rs./kg)'] = pd.to_numeric(df['Model Price (Rs./kg)'], errors='coerce')

        # Drop rows with missing price
        df = df.dropna(subset=['Model Price (Rs./kg)'])

        # Rename columns for consistency
        df = df.rename(columns={
            'District Name': 'District_Name',
            'Market Name': 'Market_Name',
            'Model Price (Rs./kg)': 'Modal_Price',
            'Min Price (Rs./Quintal)': 'Min_Price_Quintal',
            'Max Price (Rs./Quintal)': 'Max_Price_Quintal',
            'Min Price (Rs./kg)': 'Min_Price',
            'Max Price (Rs./kg)': 'Max_Price'
        })

        # Remove outliers using IQR method
        numeric_columns = ['Modal_Price', 'Min_Price', 'Max_Price']
        for col in numeric_columns:
            if col in df.columns:
                Q1 = df[col].quantile(0.25)
                Q3 = df[col].quantile(0.75)
                IQR = Q3 - Q1
                lower_bound = Q1 - 1.5 * IQR
                upper_bound = Q3 + 1.5 * IQR
                df = df[(df[col] >= lower_bound) & (df[col] <= upper_bound)]

        # Sort by date
        df = df.sort_values('Date').reset_index(drop=True)

        # Final summary
        print(f"✅ Data preprocessed: {len(df)} records after cleaning")
        print(f"📅 Date range: {df['Date'].min().date()} to {df['Date'].max().date()}")
        print(f"🏷️  Commodities: {df['Commodity'].nunique()} unique commodities")
        print(f"📍 Districts: {df['District_Name'].nunique()} unique districts")
        print(f"📊 Sample commodities: {', '.join(df['Commodity'].unique()[:10])}")

        return df

    except Exception as e:
        print(f"❌ Error loading data: {str(e)}")
        return None


# Load your data
df = load_and_preprocess_data('FinaliseDEMO.csv')

if df is not None:
    print("\n🎉 Data loading completed successfully!")
else:
    print("❌ Please check your file path and data format")


📊 Loading commodity price data from your file...
✅ Data loaded successfully: 52672 records

📋 Data Structure:
   Sl no.              District Name                Market Name Commodity  \
0       1  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar     Apple   
1       2  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar     Apple   
2       3  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar     Apple   
3       4  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar     Apple   
4       5  Chattrapati Sambhajinagar  Chattrapati Sambhajinagar     Apple   

  Variety  Grade  Min Price (Rs./Quintal)  Max Price (Rs./Quintal)  \
0   Other  Local                     5500                    13000   
1   Other    FAQ                     8000                    10000   
2   Other    FAQ                    10000                    14000   
3   Other  Local                     7000                     9000   
4   Other    FAQ                     4000                    11000   

   Mod

In [25]:
# ============================================================================
# STEP 3: ADVANCED FEATURE ENGINEERING
# ============================================================================

def create_time_features(df):
    """Create comprehensive time-based features"""
    df_featured = df.copy()
    
    # Basic time features
    df_featured['Year'] = df_featured['Date'].dt.year
    df_featured['Month'] = df_featured['Date'].dt.month
    df_featured['Day'] = df_featured['Date'].dt.day
    df_featured['DayOfWeek'] = df_featured['Date'].dt.dayofweek
    df_featured['DayOfYear'] = df_featured['Date'].dt.dayofyear
    df_featured['WeekOfYear'] = df_featured['Date'].dt.isocalendar().week
    df_featured['Quarter'] = df_featured['Date'].dt.quarter
    
    # Seasonal features
    df_featured['IsSummer'] = df_featured['Month'].isin([4, 5, 6]).astype(int)
    df_featured['IsMonsoon'] = df_featured['Month'].isin([7, 8, 9]).astype(int)
    df_featured['IsWinter'] = df_featured['Month'].isin([12, 1, 2]).astype(int)
    df_featured['IsHarvestSeason'] = df_featured['Month'].isin([10, 11, 12, 1]).astype(int)
    
    return df_featured

def create_price_features(df):
    """Create price-based features for each commodity"""
    df_featured = df.copy()
    
    # Sort by commodity and date
    df_featured = df_featured.sort_values(['Commodity', 'Date']).reset_index(drop=True)
    
    for commodity in df_featured['Commodity'].unique():
        mask = df_featured['Commodity'] == commodity
        
        # Lag features (previous day prices)
        for lag in [1, 2, 3, 7, 14]:
            df_featured.loc[mask, f'Modal_Price_Lag_{lag}'] = df_featured.loc[mask, 'Modal_Price'].shift(lag)
        
        # Rolling statistics
        for window in [7, 14, 30]:
            df_featured.loc[mask, f'Modal_Price_Rolling_Mean_{window}'] = df_featured.loc[mask, 'Modal_Price'].rolling(window=window).mean()
            df_featured.loc[mask, f'Modal_Price_Rolling_Std_{window}'] = df_featured.loc[mask, 'Modal_Price'].rolling(window=window).std()
            df_featured.loc[mask, f'Modal_Price_Rolling_Min_{window}'] = df_featured.loc[mask, 'Modal_Price'].rolling(window=window).min()
            df_featured.loc[mask, f'Modal_Price_Rolling_Max_{window}'] = df_featured.loc[mask, 'Modal_Price'].rolling(window=window).max()
        
        # Price changes
        df_featured.loc[mask, 'Price_Change_1D'] = df_featured.loc[mask, 'Modal_Price'].diff()
        df_featured.loc[mask, 'Price_Change_7D'] = df_featured.loc[mask, 'Modal_Price'].diff(7)
        df_featured.loc[mask, 'Price_Pct_Change_1D'] = df_featured.loc[mask, 'Modal_Price'].pct_change()
        df_featured.loc[mask, 'Price_Pct_Change_7D'] = df_featured.loc[mask, 'Modal_Price'].pct_change(7)
    
    return df_featured

# Apply feature engineering
if df is not None:
    print("\n🔧 Creating features...")
    df_featured = create_time_features(df)
    df_featured = create_price_features(df_featured)
    
    print("✅ Feature engineering completed!")
    print(f"📊 Total features: {df_featured.shape[1]}")
    print(f"📈 Sample features: {list(df_featured.columns[-10:])}")



🔧 Creating features...
✅ Feature engineering completed!
📊 Total features: 46
📈 Sample features: ['Modal_Price_Rolling_Min_14', 'Modal_Price_Rolling_Max_14', 'Modal_Price_Rolling_Mean_30', 'Modal_Price_Rolling_Std_30', 'Modal_Price_Rolling_Min_30', 'Modal_Price_Rolling_Max_30', 'Price_Change_1D', 'Price_Change_7D', 'Price_Pct_Change_1D', 'Price_Pct_Change_7D']


In [26]:
# ============================================================================
# STEP 4: COMPREHENSIVE PREDICTION MODEL FRAMEWORK
# ============================================================================

class CommodityPricePredictor:
    def __init__(self):
        self.models = {}
        self.scalers = {}
        self.feature_columns = []
        
    def prepare_features(self, df, commodity):
        """Prepare features for modeling"""
        # Filter for specific commodity
        commodity_data = df[df['Commodity'] == commodity].copy()
        
        if len(commodity_data) < 50:  # Minimum data requirement
            return None, None, None, "Insufficient data for modeling"
        
        # Remove rows with NaN values created by feature engineering
        commodity_data = commodity_data.dropna()
        
        if len(commodity_data) < 30:
            return None, None, None, "Insufficient data after cleaning"
        
        # Define feature columns (exclude target and identification columns)
        exclude_cols = ['Date', 'Commodity', 'District_Name', 'Market_Name', 'Variety', 'Grade', 
                       'Min_Price', 'Max_Price', 'Modal_Price', 'Min_Price_Quintal', 'Max_Price_Quintal',
                       'Sl no.', 'Price Date']
        
        self.feature_columns = [col for col in commodity_data.columns if col not in exclude_cols]
        
        # Prepare features and target
        X = commodity_data[self.feature_columns]
        y = commodity_data['Modal_Price']
        dates = commodity_data['Date']
        
        return X, y, dates, None
    
    def train_arima_model(self, y, commodity):
        """Train ARIMA model"""
        try:
            # Use only recent data for ARIMA (last 100 points)
            y_recent = y.tail(min(100, len(y)))
            
            # Check stationarity
            adf_result = adfuller(y_recent.dropna())
            
            if adf_result[1] > 0.05:  # Non-stationary
                # Difference the series
                y_diff = y_recent.diff().dropna()
                if len(y_diff) > 10:
                    model = ARIMA(y_diff, order=(1, 1, 1))
                else:
                    model = ARIMA(y_recent, order=(1, 0, 1))
            else:
                model = ARIMA(y_recent, order=(1, 0, 1))
            
            fitted_model = model.fit()
            self.models[f'{commodity}_arima'] = fitted_model
            return fitted_model, None
            
        except Exception as e:
            return None, f"ARIMA modeling failed: {str(e)}"
    
    def train_ml_models(self, X, y, commodity):
        """Train multiple ML models"""
        try:
            # Split data
            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, shuffle=False, random_state=42
            )
            
            # Scale features
            scaler = StandardScaler()
            X_train_scaled = scaler.fit_transform(X_train)
            X_test_scaled = scaler.transform(X_test)
            self.scalers[commodity] = scaler
            
            models_to_train = {
                'RandomForest': RandomForestRegressor(n_estimators=50, random_state=42, max_depth=10),
                'XGBoost': xgb.XGBRegressor(n_estimators=50, random_state=42, max_depth=6)
            }
            
            model_scores = {}
            
            for model_name, model in models_to_train.items():
                try:
                    # Train model
                    if model_name == 'XGBoost':
                        model.fit(X_train_scaled, y_train)
                        y_pred = model.predict(X_test_scaled)
                    else:
                        model.fit(X_train, y_train)
                        y_pred = model.predict(X_test)
                    
                    # Calculate metrics
                    mae = mean_absolute_error(y_test, y_pred)
                    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
                    r2 = r2_score(y_test, y_pred)
                    
                    model_scores[model_name] = {
                        'model': model,
                        'mae': mae,
                        'rmse': rmse,
                        'r2': r2,
                        'score': r2  # Use R2 as primary metric
                    }
                except Exception as e:
                    print(f"Failed to train {model_name}: {e}")
                    continue
            
            if not model_scores:
                return None, "All ML models failed"
            
            # Select best model
            best_model_name = max(model_scores, key=lambda x: model_scores[x]['score'])
            best_model_info = model_scores[best_model_name]
            
            self.models[f'{commodity}_ml'] = {
                'model': best_model_info['model'],
                'type': best_model_name,
                'metrics': best_model_info
            }
            
            return best_model_info, None
            
        except Exception as e:
            return None, f"ML modeling failed: {str(e)}"
    
    def train_commodity_model(self, df, commodity):
        """Train complete model for a commodity"""
        print(f"\n🔄 Training model for {commodity}...")
        
        # Prepare features
        X, y, dates, error = self.prepare_features(df, commodity)
        if error:
            print(f"❌ {error}")
            return error
        
        # Train ARIMA model
        arima_model, arima_error = self.train_arima_model(y, commodity)
        
        # Train ML models
        ml_model, ml_error = self.train_ml_models(X, y, commodity)
        
        if arima_model is None and ml_model is None:
            error_msg = f"Both ARIMA and ML modeling failed for {commodity}"
            print(f"❌ {error_msg}")
            return error_msg
        
        success_msg = f"✅ {commodity} model trained successfully!"
        if arima_model: success_msg += " (ARIMA + ML)" if ml_model else " (ARIMA only)"
        elif ml_model: success_msg += " (ML only)"
        
        print(success_msg)
        return None

# Initialize predictor
predictor = CommodityPricePredictor()

print("✅ Prediction framework initialized!")


✅ Prediction framework initialized!


In [27]:
# ============================================================================
# STEP 5: 28-DAY PREDICTION SYSTEM
# ============================================================================

def predict_future_prices(commodity, days_ahead=28):
    """Predict future prices for specified days"""
    try:
        # Get commodity data
        commodity_data = df_featured[df_featured['Commodity'] == commodity].copy()
        if len(commodity_data) == 0:
            return f"No data available for {commodity}"
        
        # Sort by date
        commodity_data = commodity_data.sort_values('Date').reset_index(drop=True)
        
        # Get last known date and price
        last_date = commodity_data['Date'].max()
        last_price = commodity_data[commodity_data['Date'] == last_date]['Modal_Price'].iloc[-1]
        
        # Generate future dates
        future_dates = [last_date + timedelta(days=i) for i in range(1, days_ahead + 1)]
        
        predictions = []
        
        # Try ARIMA first
        if f'{commodity}_arima' in predictor.models:
            try:
                arima_model = predictor.models[f'{commodity}_arima']
                forecast = arima_model.forecast(steps=days_ahead)
                
                # ARIMA might predict differences, so we need to convert back to levels
                if hasattr(forecast, '__len__') and len(forecast) == days_ahead:
                    # If ARIMA was trained on differences, convert back to levels
                    predictions = []
                    current_price = last_price
                    for i, diff in enumerate(forecast):
                        if abs(diff) < last_price:  # Reasonable difference
                            current_price += diff
                        else:
                            # Use simple trend if difference is too large
                            current_price *= (1 + np.random.normal(0, 0.02))
                        predictions.append(max(current_price, 0.1))  # Ensure positive prices
                else:
                    predictions = None
            except Exception as e:
                print(f"ARIMA prediction failed for {commodity}: {e}")
                predictions = None
        
        # Use ML model if ARIMA fails or unavailable
        if not predictions and f'{commodity}_ml' in predictor.models:
            try:
                ml_info = predictor.models[f'{commodity}_ml']
                model = ml_info['model']
                
                # Get last row for feature generation
                last_row = commodity_data.iloc[-1].copy()
                predictions = []
                
                for i in range(days_ahead):
                    # Create feature row for prediction
                    future_date = future_dates[i]
                    
                    # Update time features
                    last_row['Year'] = future_date.year
                    last_row['Month'] = future_date.month
                    last_row['Day'] = future_date.day
                    last_row['DayOfWeek'] = future_date.weekday()
                    last_row['DayOfYear'] = future_date.timetuple().tm_yday
                    last_row['WeekOfYear'] = future_date.isocalendar()[1]
                    last_row['Quarter'] = (future_date.month - 1) // 3 + 1
                    
                    # Update seasonal features
                    last_row['IsSummer'] = 1 if future_date.month in [4, 5, 6] else 0
                    last_row['IsMonsoon'] = 1 if future_date.month in [7, 8, 9] else 0
                    last_row['IsWinter'] = 1 if future_date.month in [12, 1, 2] else 0
                    last_row['IsHarvestSeason'] = 1 if future_date.month in [10, 11, 12, 1] else 0
                    
                    # Prepare feature vector
                    X_pred = last_row[predictor.feature_columns].values.reshape(1, -1)
                    
                    # Handle scaling if required
                    if commodity in predictor.scalers and ml_info['type'] == 'XGBoost':
                        X_pred = predictor.scalers[commodity].transform(X_pred)
                    
                    # Predict
                    pred_price = model.predict(X_pred)[0]
                    predictions.append(max(pred_price, 0.1))  # Ensure positive prices
                    
                    # Update lag features for next iteration
                    last_row['Modal_Price'] = pred_price
                    
            except Exception as e:
                print(f"ML prediction failed for {commodity}: {e}")
                predictions = None
        
        # Fallback: Simple trend-based prediction
        if not predictions:
            recent_prices = commodity_data.tail(30)['Modal_Price'].values
            if len(recent_prices) > 1:
                # Calculate trend
                trend = (recent_prices[-1] - recent_prices[0]) / len(recent_prices)
                predictions = []
                current_price = last_price
                
                for i in range(days_ahead):
                    # Add trend + some noise
                    current_price += trend + np.random.normal(0, abs(trend) * 0.1)
                    predictions.append(max(current_price, 0.1))
            else:
                return f"Insufficient data for prediction: {commodity}"
        
        if not predictions:
            return f"No model available for {commodity}"
        
        # Create results DataFrame
        results_df = pd.DataFrame({
            'Date': future_dates,
            'Predicted_Price': predictions,
            'Commodity': commodity
        })
        
        return results_df
        
    except Exception as e:
        return f"Prediction failed: {str(e)}"

def predict_weekly_prices(commodity, weeks=4):
    """Predict and organize prices by weeks"""
    print(f"\n📊 Generating {weeks}-week prediction for {commodity}...")
    
    total_days = weeks * 7
    
    # Get predictions
    predictions = predict_future_prices(commodity, days_ahead=total_days)
    
    if isinstance(predictions, str):
        print(f"❌ {predictions}")
        return predictions
    
    # Organize by weeks
    weekly_predictions = {}
    
    for week in range(1, weeks + 1):
        start_idx = (week - 1) * 7
        end_idx = week * 7
        
        week_data = predictions.iloc[start_idx:end_idx].copy()
        
        # Calculate week statistics
        avg_price = week_data['Predicted_Price'].mean()
        min_price = week_data['Predicted_Price'].min()
        max_price = week_data['Predicted_Price'].max()
        
        # Calculate trend
        first_price = week_data['Predicted_Price'].iloc[0]
        last_price = week_data['Predicted_Price'].iloc[-1]
        price_change = last_price - first_price
        price_change_pct = (price_change / first_price) * 100
        
        # Determine trend direction
        if price_change_pct > 2:
            trend = "↗ Increasing"
        elif price_change_pct < -2:
            trend = "↘ Decreasing"
        else:
            trend = "→ Stable"
        
        weekly_predictions[f'Week_{week}'] = {
            'dates': week_data['Date'].tolist(),
            'prices': week_data['Predicted_Price'].tolist(),
            'avg_price': round(avg_price, 2),
            'min_price': round(min_price, 2),
            'max_price': round(max_price, 2),
            'price_change': round(price_change, 2),
            'price_change_pct': round(price_change_pct, 2),
            'trend': trend,
            'date_range': f"{week_data['Date'].iloc[0].strftime('%b %d')} - {week_data['Date'].iloc[-1].strftime('%b %d')}"
        }
    
    return weekly_predictions

print("✅ 28-day prediction system ready!")



✅ 28-day prediction system ready!


In [28]:
# ============================================================================
# STEP 6: MARKET INTERVENTION ANALYSIS SYSTEM
# ============================================================================

def market_intervention_analysis(commodity, price_increase_threshold=20):
    """Analyze if market intervention is needed"""
    print(f"\n🔍 Analyzing market intervention needs for {commodity}...")
    
    # Get recent price data (last 30 days)
    recent_data = df_featured[df_featured['Commodity'] == commodity].tail(30)
    if len(recent_data) == 0:
        return f"No data available for {commodity}"
    
    current_avg_price = recent_data['Modal_Price'].mean()
    
    # Get 28-day predictions
    future_prices = predict_future_prices(commodity, days_ahead=28)
    
    if isinstance(future_prices, str):
        return f"Cannot analyze {commodity}: {future_prices}"
    
    # Calculate metrics
    max_predicted_price = future_prices['Predicted_Price'].max()
    avg_predicted_price = future_prices['Predicted_Price'].mean()
    min_predicted_price = future_prices['Predicted_Price'].min()
    
    # Calculate percentage changes
    max_increase_pct = ((max_predicted_price - current_avg_price) / current_avg_price) * 100
    avg_increase_pct = ((avg_predicted_price - current_avg_price) / current_avg_price) * 100
    
    # Determine intervention need
    intervention_needed = max_increase_pct > price_increase_threshold
    
    # Determine urgency level
    if max_increase_pct > 30:
        urgency = "🔴 HIGH"
        recommendation = "IMMEDIATE BUFFER STOCK RELEASE"
    elif max_increase_pct > 20:
        urgency = "🟡 MEDIUM"
        recommendation = "PREPARE BUFFER STOCK RELEASE"
    else:
        urgency = "🟢 LOW"
        recommendation = "MONITOR PRICES CLOSELY"
    
    # Calculate volatility
    price_volatility = future_prices['Predicted_Price'].std()
    volatility_pct = (price_volatility / current_avg_price) * 100
    
    return {
        'Commodity': commodity,
        'Current_Avg_Price': round(current_avg_price, 2),
        'Predicted_Max_Price': round(max_predicted_price, 2),
        'Predicted_Avg_Price': round(avg_predicted_price, 2),
        'Predicted_Min_Price': round(min_predicted_price, 2),
        'Max_Price_Increase_Pct': round(max_increase_pct, 2),
        'Avg_Price_Increase_Pct': round(avg_increase_pct, 2),
        'Price_Volatility': round(volatility_pct, 2),
        'Intervention_Needed': intervention_needed,
        'Urgency_Level': urgency,
        'Recommendation': recommendation,
        'Districts_Affected': df_featured[df_featured['Commodity'] == commodity]['District_Name'].nunique(),
        'Risk_Score': round(max_increase_pct + volatility_pct, 2)
    }

def comprehensive_market_analysis():
    """Analyze all commodities for intervention needs"""
    print("\n" + "="*80)
    print("🚨 COMPREHENSIVE MARKET INTERVENTION ANALYSIS")
    print("="*80)
    
    # Get top commodities by data availability
    commodity_counts = df_featured['Commodity'].value_counts()
    top_commodities = commodity_counts.head(8).index.tolist()  # Analyze top 8 commodities
    
    print(f"📊 Analyzing {len(top_commodities)} commodities with sufficient data...")
    print(f"Selected commodities: {', '.join(top_commodities)}")
    
    interventions_needed = []
    stable_commodities = []
    failed_analyses = []
    
    for commodity in top_commodities:
        try:
            analysis = market_intervention_analysis(commodity)
            if isinstance(analysis, dict):
                if analysis['Intervention_Needed']:
                    interventions_needed.append(analysis)
                else:
                    stable_commodities.append(analysis)
            else:
                failed_analyses.append({'commodity': commodity, 'error': analysis})
        except Exception as e:
            failed_analyses.append({'commodity': commodity, 'error': str(e)})
    
    # Sort interventions by risk score (descending)
    interventions_needed.sort(key=lambda x: x['Risk_Score'], reverse=True)
    
    return interventions_needed, stable_commodities, failed_analyses

def display_intervention_dashboard(interventions_needed, stable_commodities, failed_analyses):
    """Display comprehensive intervention analysis dashboard"""
    
    print("\n" + "="*100)
    print("📊 MARKET INTERVENTION DASHBOARD")
    print("="*100)
    
    # Summary statistics
    total_analyzed = len(interventions_needed) + len(stable_commodities)
    
    print(f"\n📈 ANALYSIS SUMMARY:")
    print(f"• Total commodities analyzed: {total_analyzed}")
    print(f"• Interventions needed: {len(interventions_needed)}")
    print(f"• Stable commodities: {len(stable_commodities)}")
    print(f"• Analysis failures: {len(failed_analyses)}")
    
    # High-risk commodities
    if interventions_needed:
        print(f"\n🚨 COMMODITIES REQUIRING INTERVENTION:")
        print("-" * 120)
        print(f"{'Commodity':<15} {'Current ₹':<12} {'Max Pred ₹':<12} {'Increase %':<12} {'Volatility %':<12} {'Risk Score':<12} {'Urgency':<15}")
        print("-" * 120)
        
        for item in interventions_needed:
            print(f"{item['Commodity']:<15} "
                  f"₹{item['Current_Avg_Price']:<11.2f} "
                  f"₹{item['Predicted_Max_Price']:<11.2f} "
                  f"{item['Max_Price_Increase_Pct']:<11.1f}% "
                  f"{item['Price_Volatility']:<11.1f}% "
                  f"{item['Risk_Score']:<11.1f} "
                  f"{item['Urgency_Level']:<15}")
        
        # Top 3 high-risk commodities
        print(f"\n🔥 TOP 3 HIGH-RISK COMMODITIES:")
        for i, item in enumerate(interventions_needed[:3], 1):
            print(f"{i}. {item['Commodity']}: {item['Max_Price_Increase_Pct']:.1f}% increase predicted")
            print(f"   → {item['Recommendation']}")
    
    else:
        print("\n✅ No commodities require immediate intervention!")
    
    # Stable commodities
    if stable_commodities:
        print(f"\n💚 STABLE COMMODITIES ({len(stable_commodities)}):")
        stable_names = [item['Commodity'] for item in stable_commodities[:6]]
        print("   " + ", ".join(stable_names))
        if len(stable_commodities) > 6:
            print(f"   ... and {len(stable_commodities) - 6} more")
    
    # Failed analyses
    if failed_analyses:
        print(f"\n⚠️  ANALYSIS FAILURES ({len(failed_analyses)}):")
        for failure in failed_analyses[:3]:
            print(f"   • {failure['commodity']}: {failure['error']}")

print("✅ Market intervention analysis system ready!")


✅ Market intervention analysis system ready!


In [29]:
# ============================================================================
# STEP 7: COMPLETE EXECUTION WORKFLOW
# ============================================================================

def execute_complete_analysis():
    """Execute the complete 28-day prediction and analysis workflow"""
    
    print("\n🚀 STARTING COMPLETE 28-DAY COMMODITY PRICE ANALYSIS")
    print("=" * 80)

    if df_featured is None:
        print("❌ No data available. Please check your file.")
        return

    # Select all commodities with at least 100 records
    commodity_counts = df_featured['Commodity'].value_counts()
    selected_commodities = commodity_counts[commodity_counts >= 100].index.tolist()

    # Fallback: if no commodity has 100+ records, include all
    if not selected_commodities:
        selected_commodities = commodity_counts.index.tolist()

    print(f"\n📋 Selected commodities for analysis: {', '.join(selected_commodities)}")
    
    # STEP 1: Train models for all selected commodities
    print(f"\n{'='*50}")
    print("🎯 STEP 1: TRAINING PREDICTION MODELS")
    print("="*50)

    training_results = {}
    for commodity in selected_commodities:
        print(f"\n🔄 Training model for {commodity}...")
        error = predictor.train_commodity_model(df_featured, commodity)
        training_results[commodity] = "Success" if error is None else f"Failed: {error}"
        if error is None:
            print(f"✅ {commodity} model trained successfully! (ARIMA + ML)")
        else:
            print(f"❌ Failed to train model for {commodity}: {error}")

    # STEP 2: Generate 28-day predictions
    print(f"\n{'='*50}")
    print("📊 STEP 2: GENERATING 28-DAY PREDICTIONS")
    print("="*50)

    prediction_results = {}
    for commodity in selected_commodities:
        if "Success" in training_results[commodity]:
            weekly_pred = predict_weekly_prices(commodity, weeks=4)
            prediction_results[commodity] = weekly_pred
        else:
            print(f"⚠️  Skipping {commodity} due to training failure")

    # STEP 3: Market intervention analysis
    print(f"\n{'='*50}")
    print("🔍 STEP 3: MARKET INTERVENTION ANALYSIS")
    print("="*50)

    interventions, stable, failures = comprehensive_market_analysis()

    # STEP 4: Display results
    print(f"\n{'='*50}")
    print("📈 STEP 4: COMPREHENSIVE RESULTS")
    print("="*50)

    for commodity, weekly_data in prediction_results.items():
        if isinstance(weekly_data, dict):
            print(f"\n🏷️  {commodity.upper()} - 28-DAY WEEK-WISE PREDICTIONS:")
            print("-" * 80)
            print(f"{'Week':<8} {'Date Range':<20} {'Avg Price':<12} {'Min-Max':<18} {'Trend':<15} {'Change %'}")
            print("-" * 80)

            for week_name, week_info in weekly_data.items():
                min_max = f"₹{week_info['min_price']:.0f}-₹{week_info['max_price']:.0f}"
                print(f"{week_name:<8} "
                      f"{week_info['date_range']:<20} "
                      f"₹{week_info['avg_price']:<11.2f} "
                      f"{min_max:<18} "
                      f"{week_info['trend']:<15} "
                      f"{week_info['price_change_pct']:+.1f}%")
        else:
            print(f"\n❌ {commodity}: {weekly_data}")

    # Display dashboard
    display_intervention_dashboard(interventions, stable, failures)

    return prediction_results, interventions, stable


# =======================
# EXECUTION WRAPPER
# =======================
try:
    if df_featured is not None:
        predictions, interventions, stable_commodities = execute_complete_analysis()
        print(f"\n✅ ANALYSIS COMPLETED SUCCESSFULLY!")
        print(f"📊 Generated predictions for {len(predictions)} commodities")
        print(f"🚨 {len(interventions)} commodities need intervention")

        # Quick Summary
        print(f"\n📋 QUICK SUMMARY:")
        print(f"• Data period: {df['Date'].min().strftime('%B %d, %Y')} to {df['Date'].max().strftime('%B %d, %Y')}")
        print(f"• Total records: {len(df):,}")
        print(f"• Commodities: {', '.join(df['Commodity'].unique())}")
        print(f"• Districts: {', '.join(df['District_Name'].unique()[:5])}")
    else:
        print("❌ Please ensure your CSV file is properly loaded")

except Exception as e:
    print(f"\n❌ ANALYSIS FAILED: {str(e)}")
    print("Please ensure:")
    print("1. Your CSV file path is correct")
    print("2. Data contains required columns: Date, Commodity, Modal_Price, District_Name")
    print("3. Data has sufficient historical records for each commodity")



🚀 STARTING COMPLETE 28-DAY COMMODITY PRICE ANALYSIS

📋 Selected commodities for analysis: Onion, Tomato, Cabbage, Potato, Brinjal, Guava, Orange, Papaya, Grapes

🎯 STEP 1: TRAINING PREDICTION MODELS

🔄 Training model for Onion...

🔄 Training model for Onion...
✅ Onion model trained successfully! (ARIMA + ML)
✅ Onion model trained successfully! (ARIMA + ML)

🔄 Training model for Tomato...

🔄 Training model for Tomato...
✅ Tomato model trained successfully! (ARIMA + ML)
✅ Tomato model trained successfully! (ARIMA + ML)

🔄 Training model for Cabbage...

🔄 Training model for Cabbage...
✅ Cabbage model trained successfully! (ARIMA + ML)
✅ Cabbage model trained successfully! (ARIMA + ML)

🔄 Training model for Potato...

🔄 Training model for Potato...
✅ Potato model trained successfully! (ARIMA + ML)
✅ Potato model trained successfully! (ARIMA + ML)

🔄 Training model for Brinjal...

🔄 Training model for Brinjal...
✅ Brinjal model trained successfully! (ARIMA + ML)
✅ Brinjal model trained suc

In [30]:
# ============================================================================
# STEP 8: EXPORT RESULTS AND GENERATE REPORTS
# ============================================================================

def export_predictions_to_csv():
    """Export all predictions to CSV files"""
    
    from datetime import datetime as dt
    timestamp = dt.now().strftime("%Y%m%d_%H%M%S")
    
    try:
        # Export individual commodity predictions
        for commodity, weekly_pred in predictions.items():
            if isinstance(weekly_pred, dict):
                # Create detailed dataframe
                export_data = []
                
                for week_name, week_data in weekly_pred.items():
                    for i, (date, price) in enumerate(zip(week_data['dates'], week_data['prices'])):
                        export_data.append({
                            'Date': date.strftime('%Y-%m-%d'),
                            'Commodity': commodity,
                            'Week': week_name,
                            'Day_in_Week': i + 1,
                            'Predicted_Price': round(price, 2),
                            'Week_Avg_Price': week_data['avg_price'],
                            'Week_Trend': week_data['trend'],
                            'Week_Change_Pct': week_data['price_change_pct']
                        })
                
                # Create DataFrame and export
                export_df = pd.DataFrame(export_data)
                filename = f"{commodity}_28day_predictions_{timestamp}.csv"
                export_df.to_csv(filename, index=False)
                print(f"✅ Exported {commodity} predictions to {filename}")
        
        # Export intervention analysis
        if interventions:
            intervention_df = pd.DataFrame(interventions)
            intervention_filename = f"market_interventions_{timestamp}.csv"
            intervention_df.to_csv(intervention_filename, index=False)
            print(f"✅ Exported intervention analysis to {intervention_filename}")
        
        print(f"\n📁 All reports exported with timestamp: {timestamp}")
        
    except Exception as e:
        print(f"❌ Export failed: {str(e)}")

def generate_summary_report():
    """Generate a comprehensive summary report"""
    
    try:
        report = []
        report.append("="*80)
        report.append("📊 28-DAY COMMODITY PRICE PREDICTION - EXECUTIVE SUMMARY")
        report.append("="*80)
        
        # Analysis details
        from datetime import datetime as dt
        report.append(f"\n📅 Analysis Date: {dt.now().strftime('%B %d, %Y')}")
        report.append(f"📈 Prediction Period: 28 days (4 weeks)")
        report.append(f"🏷️  Commodities Analyzed: {len(predictions)}")
        report.append(f"📊 Data Source: {len(df)} total records")
        report.append(f"📍 Districts Covered: {', '.join(df['District_Name'].unique()[:5])}")
        
        # Key findings
        report.append(f"\n🎯 KEY FINDINGS:")
        report.append(f"• {len(interventions)} commodities require market intervention")
        report.append(f"• {len(stable_commodities)} commodities show stable price trends")
        
        if interventions:
            highest_risk = max(interventions, key=lambda x: x['Risk_Score'])
            report.append(f"• Highest risk commodity: {highest_risk['Commodity']} ({highest_risk['Risk_Score']:.1f} risk score)")
            
            high_urgency = [item for item in interventions if 'HIGH' in item['Urgency_Level']]
            report.append(f"• High urgency interventions needed: {len(high_urgency)}")
        
        # Commodity-wise summary
        report.append(f"\n📋 COMMODITY-WISE PREDICTIONS:")
        report.append("-" * 60)
        
        for commodity, weekly_pred in predictions.items():
            if isinstance(weekly_pred, dict):
                week1_avg = weekly_pred['Week_1']['avg_price']
                week4_avg = weekly_pred['Week_4']['avg_price']
                total_change = ((week4_avg - week1_avg) / week1_avg) * 100
                
                report.append(f"• {commodity}:")
                report.append(f"  - Week 1 avg: ₹{week1_avg:.2f}")
                report.append(f"  - Week 4 avg: ₹{week4_avg:.2f}")
                report.append(f"  - Overall trend: {total_change:+.1f}%")
        
        # Recommendations
        report.append(f"\n🎯 STRATEGIC RECOMMENDATIONS:")
        if interventions:
            report.append("• IMMEDIATE ACTIONS REQUIRED:")
            for item in interventions[:3]:  # Top 3
                report.append(f"  - {item['Commodity']}: {item['Recommendation']}")
        
        report.append("• MONITORING PRIORITIES:")
        report.append("  - Continue daily price tracking for all commodities")
        report.append("  - Weekly model retraining recommended")
        report.append("  - Consider expanding historical data for better accuracy")
        
        # Save report
        report_text = "\n".join(report)
        
        timestamp = dt.now().strftime("%Y%m%d_%H%M%S")
        report_filename = f"commodity_analysis_summary_{timestamp}.txt"
        
        with open(report_filename, 'w') as f:
            f.write(report_text)
        
        print(report_text)
        print(f"\n✅ Summary report saved to {report_filename}")
        
    except Exception as e:
        print(f"❌ Report generation failed: {str(e)}")

# Execute exports and reporting
if 'predictions' in locals() and predictions:
    print("\n📁 Exporting results...")
    export_predictions_to_csv()
    generate_summary_report()
    
    print("\n🎉 COMPLETE 28-DAY COMMODITY PRICE PREDICTION SYSTEM EXECUTED SUCCESSFULLY!")
    print("="*80)
    print("✅ All analysis completed")
    print("📊 Predictions generated for multiple commodities")  
    print("🚨 Market intervention recommendations provided")
    print("📁 Results exported to CSV files")
    print("📋 Summary report generated")
else:
    print("⚠️  No predictions available for export. Please run the analysis first.")



📁 Exporting results...
✅ Exported Onion predictions to Onion_28day_predictions_20250731_032540.csv
✅ Exported Tomato predictions to Tomato_28day_predictions_20250731_032540.csv
✅ Exported Cabbage predictions to Cabbage_28day_predictions_20250731_032540.csv
✅ Exported Potato predictions to Potato_28day_predictions_20250731_032540.csv
✅ Exported Brinjal predictions to Brinjal_28day_predictions_20250731_032540.csv
✅ Exported Guava predictions to Guava_28day_predictions_20250731_032540.csv
✅ Exported Orange predictions to Orange_28day_predictions_20250731_032540.csv
✅ Exported Papaya predictions to Papaya_28day_predictions_20250731_032540.csv
✅ Exported Grapes predictions to Grapes_28day_predictions_20250731_032540.csv
✅ Exported intervention analysis to market_interventions_20250731_032540.csv

📁 All reports exported with timestamp: 20250731_032540
❌ Report generation failed: 'charmap' codec can't encode character '\U0001f4ca' in position 82: character maps to <undefined>

🎉 COMPLETE 28-

In [33]:
# Run the full analysis first
results = analyze_all_commodities_with_accuracy(df, prediction_func)

# Extract only the accuracy metrics
accuracy_metrics = results[['Commodity', 'R2_Score', 'RMSE', 'MSE']]

# Print or display the result
print(accuracy_metrics)

# Optional: Save to CSV
accuracy_metrics.to_csv("Commodity_Accuracy_Only.csv", index=False)
print("✅ Accuracy-only report saved as Commodity_Accuracy_Only.csv")


  Commodity  R2_Score  RMSE    MSE
0     Onion    0.4818  4.07  16.54
✅ Accuracy-only report saved as Commodity_Accuracy_Only.csv
