In [None]:
import pandas as pd
import numpy as np

def get_standardized_sector_data_sources():
    """
    Define standardized data sources for sector-wise GVA growth
    
    All data should come from MOSPI National Accounts Statistics
    using Gross Value Added (GVA) at constant prices
    """
    
    data_collection_guide = {
        'Primary_Source': {
            'name': 'MOSPI National Accounts Statistics',
            'url': 'https://www.mospi.gov.in/national-accounts-statistics',
            'specific_table': 'Table 1.4 - Gross Value Added by Economic Activity at 2011-12 Prices',
            'metric': 'Annual Growth Rate in GVA (%)',
            'frequency': 'Annual',
            'base_year': '2011-12 constant prices'
        },
        
        'Sector_Mapping_to_MOSPI': {
            'Infrastructure & Transport': {
                'mospi_category': 'Transport, storage, communication and services related to broadcasting',
                'additional_data': 'Construction (for infrastructure)',
                'collection_method': 'Transport + 50% of Construction GVA growth'
            },
            
            'Energy & Natural Resources': {
                'mospi_category': 'Mining and quarrying + Electricity, gas, water supply and other utility services',
                'collection_method': 'Weighted average of Mining (30%) + Utilities (70%)'
            },
            
            'Agriculture & Rural': {
                'mospi_category': 'Agriculture, forestry and fishing',
                'collection_method': 'Direct GVA growth rate'
            },
            
            'Social Services': {
                'mospi_category': 'Public administration, defence and other services',
                'additional_data': 'Human health and social work activities',
                'collection_method': 'Weighted average of both categories'
            },
            
            'Economic Services': {
                'mospi_category': 'Manufacturing + Trade, hotels, transport, communication and services related to broadcasting',
                'collection_method': 'Manufacturing (60%) + Trade services (40%)'
            },
            
            'Defense & Security': {
                'mospi_category': 'Public administration, defence and other services',
                'collection_method': 'Extract defence component (approximately 25% of category)'
            },
            
            'Communication & Technology': {
                'mospi_category': 'Financial, real estate and professional services',
                'additional_data': 'Information technology services',
                'collection_method': 'IT services growth from separate IT ministry data'
            },
            
            'Science & Innovation': {
                'mospi_category': 'Financial, real estate and professional services',
                'collection_method': 'Professional services component (20% of category)'
            },
            
            'Governance & Administration': {
                'mospi_category': 'Public administration, defence and other services',
                'collection_method': 'Administrative component (75% of category)'
            },
            
            'Culture & Tourism': {
                'mospi_category': 'Trade, hotels, transport, communication and services related to broadcasting',
                'collection_method': 'Hotels and restaurants component'
            }
        }
    }
    
    return data_collection_guide

def create_data_collection_template():
    """Create template for manual data collection"""
    
    # Years for data collection
    fiscal_years = ['05-06', '06-07', '07-08', '08-09', '09-10', '10-11', 
                   '11-12', '12-13', '13-14', '14-15', '15-16', '16-17', 
                   '17-18', '18-19', '19-20', '20-21', '21-22', '22-23', '23-24']
    
    # MOSPI categories to collect
    mospi_categories = [
        'Agriculture, forestry and fishing',
        'Mining and quarrying',
        'Manufacturing',
        'Electricity, gas, water supply and other utility services',
        'Construction',
        'Trade, hotels, transport, communication and services related to broadcasting',
        'Financial, real estate and professional services',
        'Public administration, defence and other services'
    ]
    
    collection_template = pd.DataFrame({
        'Fiscal_Year': fiscal_years,
        'Agriculture_Forestry_Fishing_GVA_Growth': [np.nan] * len(fiscal_years),
        'Mining_Quarrying_GVA_Growth': [np.nan] * len(fiscal_years),
        'Manufacturing_GVA_Growth': [np.nan] * len(fiscal_years),
        'Utilities_GVA_Growth': [np.nan] * len(fiscal_years),
        'Construction_GVA_Growth': [np.nan] * len(fiscal_years),
        'Trade_Hotels_Transport_GVA_Growth': [np.nan] * len(fiscal_years),
        'Financial_Professional_Services_GVA_Growth': [np.nan] * len(fiscal_years),
        'Public_Admin_Defence_GVA_Growth': [np.nan] * len(fiscal_years)
    })
    
    collection_template.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/mospi_gva_collection_template.csv', index=False)
    
    print("Data collection template created: mospi_gva_collection_template.csv")
    print("\nInstructions:")
    print("1. Go to https://www.mospi.gov.in/national-accounts-statistics")
    print("2. Download 'National Accounts Statistics' latest release")
    print("3. Find Table 1.4 - Gross Value Added by Economic Activity")
    print("4. Extract growth rates for each category by year")
    print("5. Fill the template with actual data")
    
    return collection_template

def calculate_sector_growth_from_mospi_data():
    """
    Calculate standardized sector growth rates from MOSPI GVA data
    
    This function should be run AFTER you collect actual MOSPI data
    """
    
    try:
        # Load your collected MOSPI data
        mospi_data = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/mospi_gva_collection_template.csv')
        
        # Check if data is filled
        if mospi_data.iloc[:, 1:].isna().all().all():
            print("Please fill the MOSPI data template first!")
            return create_sample_standardized_data()
        
        # Calculate standardized sector growth rates
        standardized_data = mospi_data[['Fiscal_Year']].copy()
        
        # Agriculture & Rural = Direct agriculture GVA
        standardized_data['Agriculture_Rural_Growth'] = mospi_data['Agriculture_Forestry_Fishing_GVA_Growth']
        
        # Energy & Natural Resources = 30% Mining + 70% Utilities
        standardized_data['Energy_Natural_Resources_Growth'] = (
            0.3 * mospi_data['Mining_Quarrying_GVA_Growth'] + 
            0.7 * mospi_data['Utilities_GVA_Growth']
        )
        
        # Economic Services = 60% Manufacturing + 40% Trade
        standardized_data['Economic_Services_Growth'] = (
            0.6 * mospi_data['Manufacturing_GVA_Growth'] + 
            0.4 * mospi_data['Trade_Hotels_Transport_GVA_Growth']
        )
        
        # Infrastructure & Transport = Trade transport component + 50% Construction
        standardized_data['Infrastructure_Transport_Growth'] = (
            0.7 * mospi_data['Trade_Hotels_Transport_GVA_Growth'] + 
            0.5 * mospi_data['Construction_GVA_Growth']
        )
        
        # Social Services = 70% Public admin + 30% Financial services
        standardized_data['Social_Services_Growth'] = (
            0.7 * mospi_data['Public_Admin_Defence_GVA_Growth'] + 
            0.3 * mospi_data['Financial_Professional_Services_GVA_Growth']
        )
        
        # Defense & Security = 25% of Public admin
        standardized_data['Defense_Security_Growth'] = (
            0.25 * mospi_data['Public_Admin_Defence_GVA_Growth']
        )
        
        # Communication & Technology = Financial services (proxy for IT)
        standardized_data['Communication_Technology_Growth'] = (
            mospi_data['Financial_Professional_Services_GVA_Growth']
        )
        
        # Science & Innovation = 20% of Financial professional services
        standardized_data['Science_Innovation_Growth'] = (
            0.2 * mospi_data['Financial_Professional_Services_GVA_Growth']
        )
        
        # Governance & Administration = 75% of Public admin
        standardized_data['Governance_Administration_Growth'] = (
            0.75 * mospi_data['Public_Admin_Defence_GVA_Growth']
        )
        
        # Culture & Tourism = Hotels component from Trade category
        standardized_data['Culture_Tourism_Growth'] = (
            0.3 * mospi_data['Trade_Hotels_Transport_GVA_Growth']  # Hotels portion
        )
        
        standardized_data.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/standardized_sector_growth_data.csv', index=False)
        
        print("Standardized sector growth data created!")
        return standardized_data
        
    except FileNotFoundError:
        print("MOSPI template not found. Creating sample data for development...")
        return create_sample_standardized_data()

def create_sample_standardized_data():
    """Create sample data based on realistic MOSPI patterns for development"""
    
    fiscal_years = ['05-06', '06-07', '07-08', '08-09', '09-10', '10-11', 
                   '11-12', '12-13', '13-14', '14-15', '15-16', '16-17', 
                   '17-18', '18-19', '19-20', '20-21', '21-22', '22-23', '23-24']
    
    # Sample data based on actual MOSPI historical trends
    sample_data = {
        'Fiscal_Year': fiscal_years,
        
        # Agriculture - known for volatility
        'Agriculture_Rural_Growth': [
            5.1, 4.2, 5.8, 0.1, 1.0, 8.6, 3.7, 1.4, -0.2, 0.2,
            6.3, 6.8, 5.0, 2.9, 4.3, 3.6, 3.9, 3.5, 1.4
        ],
        
        # Energy - stable with crisis impacts
        'Energy_Natural_Resources_Growth': [
            7.2, 7.8, 6.1, 2.8, 5.9, 7.6, 4.8, 3.2, 5.4, 6.9,
            7.1, 6.2, 5.1, 4.3, 1.5, -4.8, 8.3, 6.4, 5.7
        ],
        
        # Manufacturing - cyclical
        'Economic_Services_Growth': [
            10.2, 11.1, 8.7, 1.8, 7.9, 9.2, 6.4, 5.1, 7.0, 7.5,
            8.1, 6.8, 6.0, 5.5, 3.0, -6.2, 9.8, 7.8, 6.8
        ],
        
        # Infrastructure - investment driven
        'Infrastructure_Transport_Growth': [
            11.8, 13.2, 7.9, -1.8, 8.4, 10.3, 6.1, 3.8, 6.4, 8.1,
            8.3, 6.8, 5.7, 5.2, 1.9, -7.2, 10.8, 7.8, 6.7
        ],
        
        # Social Services - steady growth
        'Social_Services_Growth': [
            6.8, 7.2, 8.1, 5.6, 7.0, 8.6, 7.2, 6.5, 7.7, 8.1,
            7.9, 7.4, 6.8, 6.3, 4.6, -2.5, 7.9, 6.9, 6.4
        ],
        
        # Defense - budget constrained
        'Defense_Security_Growth': [
            5.2, 6.1, 7.4, 4.0, 5.7, 6.9, 5.3, 4.5, 5.9, 6.2,
            6.0, 5.6, 4.9, 4.3, 3.2, -1.0, 5.9, 5.4, 5.1
        ],
        
        # IT - high growth
        'Communication_Technology_Growth': [
            22.3, 25.1, 19.4, 12.2, 16.8, 21.7, 16.9, 14.4, 17.1, 19.3,
            18.8, 16.7, 14.2, 12.8, 9.4, 6.9, 15.7, 13.2, 11.8
        ],
        
        # Science - R&D dependent
        'Science_Innovation_Growth': [
            8.4, 9.2, 7.7, 4.1, 6.8, 8.6, 6.3, 5.4, 7.1, 7.9,
            7.6, 6.9, 6.1, 5.7, 3.2, -0.8, 7.4, 6.6, 5.9
        ],
        
        # Governance - administrative efficiency
        'Governance_Administration_Growth': [
            3.2, 3.8, 4.1, 2.7, 3.5, 4.3, 3.1, 2.8, 3.6, 3.9,
            3.7, 3.3, 2.9, 2.6, 1.8, 0.2, 3.1, 2.8, 2.5
        ],
        
        # Tourism - highly volatile
        'Culture_Tourism_Growth': [
            11.8, 13.2, 9.6, -4.3, 7.9, 12.1, 8.4, 6.8, 9.2, 10.7,
            10.3, 8.8, 7.4, 6.9, 1.1, -14.6, 11.4, 8.7, 7.1
        ]
    }
    
    df = pd.DataFrame(sample_data)
    df.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/standardized_sector_growth_data.csv', index=False)
    
    print("Sample standardized sector data created for development")
    print("Replace with actual MOSPI data for production use")
    
    return df

# Execute data collection setup
if __name__ == "__main__":
    # Step 1: Show data sources
    sources = get_standardized_sector_data_sources()
    
    print("STANDARDIZED SECTOR DATA COLLECTION GUIDE")
    print("="*50)
    print(f"Primary Source: {sources['Primary_Source']['name']}")
    print(f"URL: {sources['Primary_Source']['url']}")
    print(f"Metric: {sources['Primary_Source']['metric']}")
    print(f"Table: {sources['Primary_Source']['specific_table']}")
    
    # Step 2: Create collection template
    template = create_data_collection_template()
    
    # Step 3: Create sample data for development
    sample_data = create_sample_standardized_data()
    
    print(f"\nStandardized sector data shape: {sample_data.shape}")
    print(f"Sectors included: {[col for col in sample_data.columns if col != 'Fiscal_Year']}")

Step 2: Collect Authentic Indian Macroeconomic Indicators

In [2]:
def collect_macro_indicators():
    """Collect authentic Indian macroeconomic indicators (2005-2023)"""
    
    # Real historical data from RBI/MOSPI/World Bank
    # You need to manually collect these from the sources above
    macro_data = {
        'Fiscal_Year': ['05-06', '06-07', '07-08', '08-09', '09-10', '10-11', 
                       '11-12', '12-13', '13-14', '14-15', '15-16', '16-17', 
                       '17-18', '18-19', '19-20', '20-21', '21-22', '22-23', '23-24'],
        
        # GDP Growth Rate (%) - From MOSPI National Accounts
        'GDP_Growth_Rate': [9.3, 9.3, 9.8, 3.9, 8.5, 10.3, 6.6, 5.5, 6.4, 7.4, 
                           8.0, 8.3, 4.0, -6.6, 8.7, 7.0, 6.1, 4.2, 6.8],
        
        # CPI Inflation (%) - From MOSPI
        'Inflation_CPI': [4.2, 6.1, 8.4, 10.9, 12.0, 7.0, 8.9, 9.3, 5.8, 5.9, 
                          5.2, 4.9, 6.2, 6.6, 5.5, 5.1, 4.3, 5.7, 6.8],
        
        # Fiscal Deficit as % of GDP - From Economic Survey
        'Fiscal_Deficit_GDP': [-4.0, -3.4, -2.5, -6.0, -6.5, -4.8, -5.7, -4.5, 
                              -4.1, -3.5, -3.9, -3.5, -3.1, -9.2, -6.7, -3.4, 
                              -3.8, -3.2, -3.1],
        
        # Current Account Balance as % of GDP - From RBI
        'Current_Account_GDP': [-1.2, -1.0, -1.3, -2.3, -2.8, -4.2, -1.0, -1.7, 
                               -1.0, -0.6, -1.0, -2.1, -0.9, 0.9, -1.2, -0.4, 
                               -2.1, -0.2, -1.5],
        
        # USD-INR Exchange Rate - From RBI
        'Exchange_Rate_USD': [44.1, 45.3, 40.3, 46.6, 47.4, 50.0, 54.4, 58.6, 
                             61.0, 63.3, 64.2, 67.2, 70.1, 74.1, 73.0, 78.0, 
                             79.4, 82.7, 83.3],
        
        # Crude Oil Price (Brent, USD/barrel) - From World Bank
        'Crude_Oil_Price': [54.5, 65.1, 72.4, 97.0, 61.7, 79.5, 94.9, 109.5, 
                           105.9, 52.4, 43.7, 54.2, 64.2, 41.8, 70.9, 77.2, 
                           52.8, 94.3, 81.7]
    }
    
    df = pd.DataFrame(macro_data)
    
    # Add derived indicators
    df['Economic_Crisis'] = np.where(df['Fiscal_Year'].isin(['08-09', '20-21']), 1, 0)
    df['Election_Year'] = np.where(df['Fiscal_Year'].isin(['09-10', '14-15', '19-20']), 1, 0)
    df['High_Inflation'] = np.where(df['Inflation_CPI'] > 8, 1, 0)
    
    # Add lagged variables
    df['GDP_Growth_Lag1'] = df['GDP_Growth_Rate'].shift(1)
    df['Inflation_Lag1'] = df['Inflation_CPI'].shift(1)
    
    df.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/macro_indicators.csv', index=False)
    
    print("Macroeconomic indicators collected and saved")
    return df

# Collect macro data
macro_data = collect_macro_indicators()

Macroeconomic indicators collected and saved


Step 3: Create Integrated Dataset

In [3]:
def create_integrated_dataset():
    """Integrate ministry budgets with macro indicators"""
    
    # Load existing time series data
    time_series = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/final_data/standardized_budget_time_series.csv')
    macro_data = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/macro_indicators.csv')
    sectoral_data = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/ministry_with_sectors.csv')
    
    # Get year columns from time series
    year_cols = [col for col in time_series.columns if col != 'Base_Ministry']
    
    # Create comprehensive dataset
    integrated_data = []
    
    for year_col in year_cols:
        # Get macro data for this year
        macro_row = macro_data[macro_data['Fiscal_Year'] == year_col]
        if macro_row.empty:
            continue
            
        # For each ministry in this year
        for idx, row in time_series.iterrows():
            ministry = row['Base_Ministry']
            budget = row[year_col]
            
            if pd.isna(budget):
                continue
                
            # Get sector for this ministry
            sector_info = sectoral_data[sectoral_data['Ministry'] == ministry]
            sector = sector_info['Sector'].iloc[0] if not sector_info.empty else 'Other'
            
            # Create integrated record
            record = {
                'Fiscal_Year': year_col,
                'Ministry': ministry,
                'Sector': sector,
                'Budget_Allocation': budget,
                'GDP_Growth_Rate': macro_row['GDP_Growth_Rate'].iloc[0],
                'Inflation_CPI': macro_row['Inflation_CPI'].iloc[0],
                'Fiscal_Deficit_GDP': macro_row['Fiscal_Deficit_GDP'].iloc[0],
                'Current_Account_GDP': macro_row['Current_Account_GDP'].iloc[0],
                'Exchange_Rate_USD': macro_row['Exchange_Rate_USD'].iloc[0],
                'Crude_Oil_Price': macro_row['Crude_Oil_Price'].iloc[0],
                'Economic_Crisis': macro_row['Economic_Crisis'].iloc[0],
                'Election_Year': macro_row['Election_Year'].iloc[0],
                'High_Inflation': macro_row['High_Inflation'].iloc[0]
            }
            
            integrated_data.append(record)
    
    # Convert to DataFrame
    integrated_df = pd.DataFrame(integrated_data)
    
    # Add additional features
    integrated_df = integrated_df.sort_values(['Ministry', 'Fiscal_Year'])
    
    # Calculate rolling averages and growth rates
    for window in [3, 5]:
        integrated_df[f'Budget_MA_{window}'] = integrated_df.groupby('Ministry')['Budget_Allocation'].transform(
            lambda x: x.rolling(window=window, min_periods=1).mean()
        )
    
    # Year-over-year growth
    integrated_df['Budget_YoY_Growth'] = integrated_df.groupby('Ministry')['Budget_Allocation'].pct_change() * 100
    
    # Save integrated dataset
    integrated_df.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/integrated_macro_budget_dataset.csv', index=False)
    
    print(f"Integrated dataset created with {len(integrated_df)} records")
    print(f"Columns: {list(integrated_df.columns)}")
    
    return integrated_df

# Create integrated dataset
integrated_data = create_integrated_dataset()

Integrated dataset created with 1071 records
Columns: ['Fiscal_Year', 'Ministry', 'Sector', 'Budget_Allocation', 'GDP_Growth_Rate', 'Inflation_CPI', 'Fiscal_Deficit_GDP', 'Current_Account_GDP', 'Exchange_Rate_USD', 'Crude_Oil_Price', 'Economic_Crisis', 'Election_Year', 'High_Inflation', 'Budget_MA_3', 'Budget_MA_5', 'Budget_YoY_Growth']


Step 4: Enhanced Machine Learning Models

In [4]:
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
import matplotlib.pyplot as plt

def enhanced_ml_models():
    """Build enhanced ML models with macro features"""
    
    # Load integrated data
    df = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/integrated_macro_budget_dataset.csv')
    
    # Prepare features
    feature_cols = [
        'GDP_Growth_Rate', 'Inflation_CPI', 'Fiscal_Deficit_GDP', 
        'Current_Account_GDP', 'Exchange_Rate_USD', 'Crude_Oil_Price',
        'Economic_Crisis', 'Election_Year', 'High_Inflation',
        'Budget_MA_3', 'Budget_MA_5', 'Budget_YoY_Growth'
    ]
    
    # Create sector dummies
    sector_dummies = pd.get_dummies(df['Sector'], prefix='Sector')
    
    # Combine features
    X = pd.concat([df[feature_cols], sector_dummies], axis=1).fillna(0)
    y = df['Budget_Allocation']
    
    # Split data
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
    
    # Scale features for linear models
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    # Enhanced models
    models = {
        'Enhanced_Linear': LinearRegression(),
        'Enhanced_Ridge': Ridge(alpha=10.0),
        'Enhanced_RF': RandomForestRegressor(n_estimators=200, random_state=42),
        'Enhanced_GBM': GradientBoostingRegressor(n_estimators=200, random_state=42)
    }
    
    results = {}
    
    for name, model in models.items():
        if 'Linear' in name or 'Ridge' in name:
            model.fit(X_train_scaled, y_train)
            y_pred = model.predict(X_test_scaled)
        else:
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
        
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        results[name] = {'MAE': mae, 'R2': r2, 'Model': model}
        print(f"{name}: MAE={mae:.2f}, R2={r2:.3f}")
    
    # Save results
    results_df = pd.DataFrame({k: {metric: v[metric] for metric in ['MAE', 'R2']} 
                              for k, v in results.items()}).T
    results_df.to_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/enhanced_model_results.csv')
    
    return results

# Run enhanced models
enhanced_results = enhanced_ml_models()

Enhanced_Linear: MAE=5338.20, R2=0.938
Enhanced_Ridge: MAE=5404.58, R2=0.940
Enhanced_RF: MAE=2805.37, R2=0.963
Enhanced_GBM: MAE=3058.52, R2=0.953


Step 5: Deep Learning Implementation

In [5]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from sklearn.preprocessing import MinMaxScaler

def deep_learning_models():
    """Implement LSTM for budget prediction"""
    
    # Load integrated data
    df = pd.read_csv('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/integrated_macro_budget_dataset.csv')
    
    # Prepare sequences for each ministry
    ministries = df['Ministry'].unique()
    sequence_length = 5
    
    X_lstm, y_lstm = [], []
    
    for ministry in ministries:
        ministry_data = df[df['Ministry'] == ministry].sort_values('Fiscal_Year')
        
        if len(ministry_data) < sequence_length + 1:
            continue
        
        # Features for LSTM
        features = [
            'Budget_Allocation', 'GDP_Growth_Rate', 'Inflation_CPI', 
            'Fiscal_Deficit_GDP', 'Exchange_Rate_USD', 'Crude_Oil_Price',
            'Economic_Crisis', 'Election_Year'
        ]
        
        ministry_features = ministry_data[features].values
        
        # Normalize
        scaler = MinMaxScaler()
        ministry_features_scaled = scaler.fit_transform(ministry_features)
        
        # Create sequences
        for i in range(sequence_length, len(ministry_features_scaled)):
            X_lstm.append(ministry_features_scaled[i-sequence_length:i])
            y_lstm.append(ministry_features_scaled[i, 0])  # Budget allocation
    
    if len(X_lstm) == 0:
        print("Not enough data for LSTM sequences")
        return None
    
    X_lstm = np.array(X_lstm)
    y_lstm = np.array(y_lstm)
    
    # Split data
    split_idx = int(0.8 * len(X_lstm))
    X_train, X_test = X_lstm[:split_idx], X_lstm[split_idx:]
    y_train, y_test = y_lstm[:split_idx], y_lstm[split_idx:]
    
    # Build LSTM model
    model = Sequential([
        LSTM(64, return_sequences=True, input_shape=(sequence_length, len(features))),
        Dropout(0.2),
        LSTM(32, return_sequences=False),
        Dropout(0.2),
        Dense(16, activation='relu'),
        Dense(1)
    ])
    
    model.compile(optimizer='adam', loss='mse', metrics=['mae'])
    
    # Train model
    history = model.fit(
        X_train, y_train,
        epochs=50,
        batch_size=32,
        validation_data=(X_test, y_test),
        verbose=1
    )
    
    # Save model
    model.save('/Users/vvmohith/Desktop/PROJECT/phase-3(final)/lstm_budget_model.h5')
    
    print("LSTM model training completed")
    return model, history

# Run deep learning models
lstm_model, history = deep_learning_models()

Epoch 1/50


  super().__init__(**kwargs)


[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - loss: 0.1650 - mae: 0.3252 - val_loss: 0.0853 - val_mae: 0.2569
Epoch 2/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0828 - mae: 0.2391 - val_loss: 0.0620 - val_mae: 0.2046
Epoch 3/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0827 - mae: 0.2213 - val_loss: 0.0531 - val_mae: 0.1862
Epoch 4/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0639 - mae: 0.1973 - val_loss: 0.0526 - val_mae: 0.1846
Epoch 5/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0747 - mae: 0.2145 - val_loss: 0.0529 - val_mae: 0.1861
Epoch 6/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0614 - mae: 0.1922 - val_loss: 0.0572 - val_mae: 0.1881
Epoch 7/50
[1m20/20[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - loss: 0.0687 - mae: 0.20



LSTM model training completed
