In [None]:
!pip install pandas
!pip install numpy
!pip install matplotlib
!pip install seaborn
!pip install scikit-learn
!pip install gradio

Collecting gradio
  Downloading gradio-5.23.3-py3-none-any.whl.metadata (16 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi<1.0,>=0.115.2 (from gradio)
  Downloading fastapi-0.115.12-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.5.0-py3-none-any.whl.metadata (3.0 kB)
Collecting gradio-client==1.8.0 (from gradio)
  Downloading gradio_client-1.8.0-py3-none-any.whl.metadata (7.1 kB)
Collecting groovy~=0.1 (from gradio)
  Downloading groovy-0.1.2-py3-none-any.whl.metadata (6.1 kB)
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.18 (from gradio)
  Downloading python_multipart-0.0.20-py3-none-any.whl.metadata (1.8 kB)
Collecting ruff>=0.9.3 (from gradio)
  Downloading ruff-0.11.3-py3-none-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (25 kB)
Collecting safehttpx<0.2.0,>=0.1.6 

In [None]:
#GITHUB REPOSITORY LINK : https://github.com/Joelrajjoe/ImapctX-GDP-.git
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, cross_val_score, TimeSeriesSplit
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from sklearn.preprocessing import StandardScaler
import gradio as gr
import io
import base64
import warnings
warnings.filterwarnings('ignore')

# Global variables to store data between function calls
global_forecast_df = None
global_ts_df = None

# Function to load and preprocess the data
def load_data(file_obj):
    """
    Load the data from the uploaded file and perform initial preprocessing.
    """
    try:
        # Read the uploaded file as a CSV
        df = pd.read_csv(file_obj.name)
        print(f"Data loaded successfully with {df.shape[0]} rows and {df.shape[1]} columns.")
        return df
    except FileNotFoundError:
        print(f"Error: File not found")
        return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Extract GDP columns from the dataframe
def extract_gdp_columns(df):
    """
    Extract columns related to GDP per capita from the dataframe.
    """
    gdp_columns = [col for col in df.columns if 'NSDP Per Capita (Nominal)' in col]
    return gdp_columns

# Function to extract year from column name
def extract_year(col_name):
    """
    Extract the year from the column name.
    """
    year_str = col_name.split(')')[-1]
    if '-' in year_str:
        start_year = int(year_str.split('-')[0])
        return start_year
    else:
        # If the format is different, try to extract the year another way
        return int(year_str)

# Function to clean GDP values
def clean_gdp_value(value):
    """
    Convert GDP string values to float by removing non-numeric characters.
    """
    if isinstance(value, str):
        # Remove ₹ symbol and any commas, spaces, etc.
        cleaned = ''.join(c for c in value if c.isdigit() or c == '.')
        try:
            return float(cleaned)
        except ValueError:
            return np.nan
    return value

# Prepare time series data with proper cleaning
def prepare_time_series_data(df, gdp_columns):
    """
    Transform the data for time series analysis with proper data cleaning.
    """
    ts_data = []

    for _, row in df.iterrows():
        state = row['State Name']
        for col in gdp_columns:
            year = extract_year(col)
            gdp = clean_gdp_value(row[col])  # Clean the GDP value
            ts_data.append({'State': state, 'Year': year, 'GDP_Per_Capita': gdp})

    ts_df = pd.DataFrame(ts_data)
    # Drop any rows with NaN GDP values
    ts_df = ts_df.dropna(subset=['GDP_Per_Capita'])
    return ts_df

# Fixed feature engineering function
def create_features(df):
    """
    Create features for time series forecasting.
    """
    # Create a copy of the dataframe to avoid modifying the original
    df_features = df.copy()

    # Group by state
    for state in df_features['State'].unique():
        state_mask = df_features['State'] == state
        df_state = df_features[state_mask].sort_values('Year')

        # Create lag features (only if enough data)
        if len(df_state) > 2:
            df_features.loc[state_mask, 'GDP_Lag1'] = df_state['GDP_Per_Capita'].shift(1)
            df_features.loc[state_mask, 'GDP_Lag2'] = df_state['GDP_Per_Capita'].shift(2)

            # Create growth rate feature
            df_features.loc[state_mask, 'Growth_Rate'] = df_state['GDP_Per_Capita'].pct_change()

            # Create the lagged growth rate
            df_features.loc[state_mask, 'Growth_Rate_Lag1'] = df_features.loc[state_mask, 'Growth_Rate'].shift(1)

            # Add rolling averages (only if enough data)
            df_features.loc[state_mask, 'Rolling_Avg_3Y'] = df_state['GDP_Per_Capita'].rolling(window=min(3, len(df_state)), min_periods=1).mean()

    # Drop rows with NaN (first two years for each state)
    df_features = df_features.dropna()

    # Create state encoding
    df_features = pd.get_dummies(df_features, columns=['State'], drop_first=True)

    return df_features

# Function to calculate Gini coefficient
def calculate_gini(array):
    """
    Calculate the Gini coefficient of inequality
    """
    array = np.array(array)
    if len(array) < 2:
        return 0  # Need at least 2 points to calculate inequality

    # Sort array
    array = np.sort(array)
    # Calculate cumulative sum of array
    cumulative_sum = np.cumsum(array)
    # Calculate cumulative share of population and income
    n = len(array)
    cumulative_people = np.arange(1, n + 1) / n
    cumulative_income = cumulative_sum / cumulative_sum[-1]
    # Calculate Gini coefficient
    gini = 1 - 2 * np.sum((cumulative_income[:-1] + cumulative_income[1:]) / 2 * np.diff(cumulative_people))
    return gini

# Function to forecast future GDP
def forecast_future_gdp(best_model, scaler, feature_df, ts_df, forecast_years=3):
    """
    Generate GDP forecasts for future years using the trained model.
    """
    # Get the latest data
    latest_year = ts_df['Year'].max()
    future_predictions = {}

    # Prepare data for each state
    for state in ts_df['State'].unique():
        state_data = ts_df[ts_df['State'] == state].sort_values('Year')

        # Check if we have enough data for this state
        if len(state_data) < 3:
            continue

        # Get the latest values for this state
        latest_state_data = state_data[state_data['Year'] == latest_year]
        if len(latest_state_data) == 0:
            continue

        current_gdp = latest_state_data['GDP_Per_Capita'].values[0]
        state_predictions = [current_gdp]

        # Create a prediction for each future year
        for i in range(1, forecast_years + 1):
            future_year = latest_year + i

            # Create a feature row similar to our training data
            pred_features = pd.DataFrame({
                'GDP_Lag1': [state_predictions[-1]],
                'GDP_Lag2': [state_data.iloc[-2]['GDP_Per_Capita'] if i == 1 else state_predictions[-2]],
                'Growth_Rate': [(state_predictions[-1] /
                                (state_data.iloc[-2]['GDP_Per_Capita'] if i == 1 else state_predictions[-2]) - 1)],
                'Growth_Rate_Lag1': [(state_data.iloc[-1]['GDP_Per_Capita'] / state_data.iloc[-2]['GDP_Per_Capita'] - 1)
                                    if i == 1 else (state_predictions[-2] / state_predictions[-3] - 1) if i > 2 else 0],
                'Rolling_Avg_3Y': [np.mean(state_predictions[-3:] if i > 2 else
                                          state_predictions + [state_data.iloc[-(3-len(state_predictions))]['GDP_Per_Capita']
                                                           for _ in range(3-len(state_predictions))])]
            })

            # Add state one-hot encoding
            for s in ts_df['State'].unique():
                if s != list(ts_df['State'].unique())[0]:  # Skip the reference state
                    col_name = f'State_{s}'
                    pred_features[col_name] = [1 if s == state else 0]

            # Fill in any missing columns from training data
            for col in feature_df.drop(['GDP_Per_Capita', 'Year'], axis=1).columns:
                if col not in pred_features.columns:
                    pred_features[col] = 0

            # Ensure columns are in the same order as training data
            pred_features = pred_features[feature_df.drop(['GDP_Per_Capita', 'Year'], axis=1).columns]

            # Scale the features
            pred_features_scaled = scaler.transform(pred_features)

            # Make prediction
            pred_gdp = best_model.predict(pred_features_scaled)[0]
            state_predictions.append(pred_gdp)

        # Store predictions for this state
        future_predictions[state] = {
            latest_year + i: state_predictions[i] for i in range(1, forecast_years + 1)
        }

    # Convert to DataFrame
    forecast_df = pd.DataFrame(columns=['State', 'Year', 'Forecasted_GDP'])
    for state, years_dict in future_predictions.items():
        for year, gdp in years_dict.items():
            forecast_df = pd.concat([forecast_df, pd.DataFrame({
                'State': [state],
                'Year': [year],
                'Forecasted_GDP': [gdp]
            })], ignore_index=True)

    return forecast_df

# Create visualizations for the UI
def create_visualizations(ts_df, y_test=None, y_pred=None, best_model_name=None, forecast_df=None):
    """Generate visualizations and return them as base64 encoded images"""
    images = {}

    # 1. GDP Trends for Top States
    plt.figure(figsize=(10, 6))
    latest_year = ts_df['Year'].max()
    top_states = ts_df[ts_df['Year'] == latest_year].nlargest(5, 'GDP_Per_Capita')['State'].unique()
    top_states_data = ts_df[ts_df['State'].isin(top_states)]

    sns.lineplot(data=top_states_data, x='Year', y='GDP_Per_Capita', hue='State')
    plt.title('GDP Per Capita Trends for Top 5 States')
    plt.xlabel('Year')
    plt.ylabel('GDP Per Capita')
    plt.grid(True, alpha=0.3)

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    images['top_states_trend'] = base64.b64encode(buf.read()).decode('utf-8')
    plt.close()

    # 2. Inequality Analysis (Gini Coefficient)
    yearly_inequality = {}
    for year in sorted(ts_df['Year'].unique()):
        year_data = ts_df[ts_df['Year'] == year]['GDP_Per_Capita']
        if len(year_data) > 1:  # Need at least 2 states to calculate inequality
            yearly_inequality[year] = {
                'Gini': calculate_gini(year_data),
                'Max/Min Ratio': year_data.max() / year_data.min() if year_data.min() > 0 else np.nan,
                'Standard Deviation': year_data.std(),
                'Coefficient of Variation': year_data.std() / year_data.mean() if year_data.mean() > 0 else np.nan
            }

    inequality_df = pd.DataFrame(yearly_inequality).T

    plt.figure(figsize=(10, 6))
    plt.plot(inequality_df.index, inequality_df['Gini'], marker='o')
    plt.title('Gini Coefficient Over Time (Higher = More Inequality)')
    plt.xlabel('Year')
    plt.ylabel('Gini Coefficient')
    plt.grid(True, alpha=0.3)

    buf = io.BytesIO()
    plt.savefig(buf, format='png')
    buf.seek(0)
    images['inequality_trend'] = base64.b64encode(buf.read()).decode('utf-8')
    plt.close()

    # 3. Growth Rate Analysis
    if 'Coefficient of Variation' in inequality_df.columns:
        plt.figure(figsize=(10, 6))
        plt.plot(inequality_df.index, inequality_df['Coefficient of Variation'], marker='o', color='green')
        plt.title('Coefficient of Variation Over Time (Higher = More Disparity)')
        plt.xlabel('Year')
        plt.ylabel('Coefficient of Variation')
        plt.grid(True, alpha=0.3)

        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        images['cv_trend'] = base64.b64encode(buf.read()).decode('utf-8')
        plt.close()

    # 4. Model Performance Visualization (if available)
    if y_test is not None and y_pred is not None and best_model_name is not None:
        plt.figure(figsize=(10, 6))
        plt.scatter(y_test, y_pred, alpha=0.5)
        min_val = min(min(y_test), min(y_pred))
        max_val = max(max(y_test), max(y_pred))
        plt.plot([min_val, max_val], [min_val, max_val], 'r--')
        plt.xlabel('Actual GDP Per Capita')
        plt.ylabel('Predicted GDP Per Capita')
        plt.title(f'Actual vs. Predicted GDP Per Capita using {best_model_name}')
        plt.grid(True, alpha=0.3)

        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        images['model_performance'] = base64.b64encode(buf.read()).decode('utf-8')
        plt.close()

    # 5. Forecast Visualization (if available)
    if forecast_df is not None and len(forecast_df) > 0:
        plt.figure(figsize=(10, 6))

        for state in top_states:
            # Historical data
            hist_data = ts_df[ts_df['State'] == state]
            plt.plot(hist_data['Year'], hist_data['GDP_Per_Capita'], marker='o', label=f"{state} (Historical)")

            # Forecast data
            fc_data = forecast_df[forecast_df['State'] == state]
            if not fc_data.empty:
                plt.plot(fc_data['Year'], fc_data['Forecasted_GDP'], marker='*', linestyle='--',
                         label=f"{state} (Forecast)")

        plt.title('GDP Per Capita Forecast for Top 5 States')
        plt.xlabel('Year')
        plt.ylabel('GDP Per Capita')
        plt.legend()
        plt.grid(True, alpha=0.3)

        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        images['forecast'] = base64.b64encode(buf.read()).decode('utf-8')
        plt.close()

    return images

# Run analysis function - core function that processes the data and builds models
def run_analysis(file):
    global global_forecast_df, global_ts_df

    try:
        # Load data from uploaded file
        df = load_data(file)
        if df is None:
            return "Error loading data file", None, None, None, None, None, None

        # Extract GDP columns
        gdp_columns = extract_gdp_columns(df)
        if not gdp_columns:
            return "No GDP columns found in the data", None, None, None, None, None, None

        # Prepare time series data
        ts_df = prepare_time_series_data(df, gdp_columns)
        if len(ts_df) == 0:
            return "Could not prepare time series data", None, None, None, None, None, None

        # Store the original time series data globally
        global_ts_df = ts_df

        # Create features for modeling
        try:
            feature_df = create_features(ts_df)
            if len(feature_df) == 0:
                return "Not enough time series data for feature creation", None, None, None, None, None, None
        except Exception as e:
            return f"Error in feature creation: {str(e)}", None, None, None, None, None, None

        # Prepare data for modeling
        X = feature_df.drop(['GDP_Per_Capita', 'Year'], axis=1)
        y = feature_df['GDP_Per_Capita']

        # Split data for training and testing
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

        # Scale the features
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)

        # Define models
        models = {
            'Linear Regression': LinearRegression(),
            'Ridge Regression': Ridge(alpha=1.0),
            'Random Forest': RandomForestRegressor(n_estimators=100, random_state=42),
            'Gradient Boosting': GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
        }

        # Evaluate models using cross-validation
        results = {}
        tscv = TimeSeriesSplit(n_splits=min(5, len(X_train) // 2))

        model_results = ""
        for name, model in models.items():
            try:
                cv_scores = cross_val_score(model, X_train_scaled, y_train, cv=tscv,
                                          scoring='neg_mean_squared_error')
                rmse_scores = np.sqrt(-cv_scores)
                results[name] = {
                    'cv_rmse_mean': rmse_scores.mean(),
                    'cv_rmse_std': rmse_scores.std()
                }
                model_results += f"{name} - CV RMSE: {rmse_scores.mean():.2f} (+/- {rmse_scores.std():.2f})\n"
            except Exception as e:
                model_results += f"Error evaluating {name}: {str(e)}\n"

        if not results:
            return "No models could be trained successfully", None, None, None, None, None, None

        # Train the best model
        best_model_name = min(results, key=lambda x: results[x]['cv_rmse_mean'])
        best_model = models[best_model_name]

        best_model.fit(X_train_scaled, y_train)

        # Make predictions
        y_pred = best_model.predict(X_test_scaled)

        # Calculate metrics
        mse = mean_squared_error(y_test, y_pred)
        rmse = np.sqrt(mse)
        mae = mean_absolute_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)

        metrics = f"Best Model: {best_model_name}\n"
        metrics += f"Mean Squared Error (MSE): {mse:.2f}\n"
        metrics += f"Root Mean Squared Error (RMSE): {rmse:.2f}\n"
        metrics += f"Mean Absolute Error (MAE): {mae:.2f}\n"
        metrics += f"R² Score: {r2:.4f}"

        # Generate future forecasts
        forecast_df = forecast_future_gdp(best_model, scaler, feature_df, ts_df)

        # Store forecast data globally
        global_forecast_df = forecast_df

        # Create visualizations
        viz_images = create_visualizations(ts_df, y_test, y_pred, best_model_name, forecast_df)

        # Feature importance
        feature_importance = None
        if hasattr(best_model, 'feature_importances_'):
            feature_importance = pd.DataFrame({
                'Feature': X.columns,
                'Importance': best_model.feature_importances_
            }).sort_values('Importance', ascending=False)

        # Social impact analysis
        yearly_inequality = {}
        for year in sorted(ts_df['Year'].unique()):
            year_data = ts_df[ts_df['Year'] == year]['GDP_Per_Capita']
            if len(year_data) > 1:
                yearly_inequality[year] = {
                    'Gini': calculate_gini(year_data),
                    'Max/Min Ratio': year_data.max() / year_data.min() if year_data.min() > 0 else np.nan,
                    'Standard Deviation': year_data.std(),
                    'Coefficient of Variation': year_data.std() / year_data.mean() if year_data.mean() > 0 else np.nan
                }

        inequality_df = pd.DataFrame(yearly_inequality).T

        social_impact = "Economic Inequality Analysis:\n\n"
        if not inequality_df.empty:
            social_impact += f"Starting Gini Coefficient (earliest year): {inequality_df['Gini'].iloc[0]:.4f}\n"
            social_impact += f"Ending Gini Coefficient (latest year): {inequality_df['Gini'].iloc[-1]:.4f}\n"

            if inequality_df['Gini'].iloc[-1] > inequality_df['Gini'].iloc[0]:
                social_impact += "Trend: Inequality has INCREASED over the observed period.\n\n"
            else:
                social_impact += "Trend: Inequality has DECREASED over the observed period.\n\n"

        social_impact += "States with highest predicted growth rates:\n"
        growth_predictions = {}

        for state in forecast_df['State'].unique():
            state_data = forecast_df[forecast_df['State'] == state].sort_values('Year')
            if len(state_data) >= 2:
                first_year = state_data['Forecasted_GDP'].iloc[0]
                last_year = state_data['Forecasted_GDP'].iloc[-1]
                growth_rate = (last_year / first_year - 1) * 100
                growth_predictions[state] = growth_rate

        # Sort by growth rate and get top 5
        top_growth_states = sorted(growth_predictions.items(), key=lambda x: x[1], reverse=True)[:5]
        for state, growth in top_growth_states:
            social_impact += f"- {state}: {growth:.2f}%\n"

        social_impact += "\nPolicy Implications:\n"
        social_impact += "1. States with lower GDP per capita require targeted development initiatives\n"
        social_impact += "2. Focus on reducing inequality through inclusive growth strategies\n"
        social_impact += "3. Implement policies to boost growth in states with poor forecast projections\n"

        return "Analysis complete", model_results, metrics, viz_images, feature_importance, social_impact, forecast_df

    except Exception as e:
        import traceback
        return f"Error in analysis: {str(e)}\n{traceback.format_exc()}", None, None, None, None, None, None

# State prediction function for the UI
def predict_state(state_name, forecast_df):
    """Get predictions for a specific state"""
    if forecast_df is None or len(forecast_df) == 0:
        return "No forecast data available. Please run the analysis first."

    if state_name not in forecast_df['State'].unique():
        return f"No data available for {state_name}. Available states: {', '.join(forecast_df['State'].unique())}"

    state_forecast = forecast_df[forecast_df['State'] == state_name].sort_values('Year')

    result = f"GDP Per Capita Forecast for {state_name}:\n\n"
    for _, row in state_forecast.iterrows():
        result += f"Year {int(row['Year'])}: ₹{row['Forecasted_GDP']:,.2f}\n"

    return result

# Get states with highest and lowest projected growth
def get_growth_analysis(forecast_df):
    """Analyze growth rates from forecast data"""
    if forecast_df is None or len(forecast_df) == 0:
        return "No forecast data available. Please run the analysis first."

    growth_rates = {}
    for state in forecast_df['State'].unique():
        state_data = forecast_df[forecast_df['State'] == state].sort_values('Year')
        if len(state_data) >= 2:
            first_gdp = state_data['Forecasted_GDP'].iloc[0]
            last_gdp = state_data['Forecasted_GDP'].iloc[-1]
            growth = (last_gdp / first_gdp - 1) * 100
            growth_rates[state] = growth

    # Get top 5 and bottom 5 states by growth rate
    sorted_states = sorted(growth_rates.items(), key=lambda x: x[1], reverse=True)
    top_states = sorted_states[:min(5, len(sorted_states))]
    bottom_states = sorted_states[-min(5, len(sorted_states)):]

    result = "States with Highest Projected Growth Rates:\n\n"
    for state, rate in top_states:
        result += f"{state}: {rate:.2f}%\n"

    result += "\nStates with Lowest Projected Growth Rates:\n\n"
    for state, rate in bottom_states:
        result += f"{state}: {rate:.2f}%\n"

    return result

# Create intervention recommendations
def generate_recommendations(forecast_df, ts_df):
    """Generate policy recommendations based on analysis"""
    if forecast_df is None or len(forecast_df) == 0 or ts_df is None:
        return "No data available. Please run the analysis first."

    # Find states with below-average GDP
    latest_year = ts_df['Year'].max()
    latest_data = ts_df[ts_df['Year'] == latest_year]
    avg_gdp = latest_data['GDP_Per_Capita'].mean()

    below_avg_states = latest_data[latest_data['GDP_Per_Capita'] < avg_gdp]['State'].tolist()

    # Get growth rates
    growth_rates = {}
    for state in forecast_df['State'].unique():
        state_data = forecast_df[forecast_df['State'] == state].sort_values('Year')
        if len(state_data) >= 2:
            first_gdp = state_data['Forecasted_GDP'].iloc[0]
            last_gdp = state_data['Forecasted_GDP'].iloc[-1]
            growth = (last_gdp / first_gdp - 1) * 100
            growth_rates[state] = growth

    # Filter to only include below average states
    below_avg_growth = {state: rate for state, rate in growth_rates.items() if state in below_avg_states}

    # Sort by growth rate
    sorted_states = sorted(below_avg_growth.items(), key=lambda x: x[1])

    # Generate recommendations
    result = "## Policy Intervention Recommendations\n\n"
    result += "### Priority States for Intervention:\n\n"

    for state, rate in sorted_states[:min(5, len(sorted_states))]:
        result += f"**{state}** - Current GDP below average, Projected Growth: {rate:.2f}%\n\n"

        # Customize recommendations based on growth rate
        if rate < 0:
            result += "- **URGENT ACTION NEEDED**: Negative growth projection\n"
            result += "- Implement economic stimulus package\n"
            result += "- Develop infrastructure investment plan\n"
            result += "- Create special economic zones to attract investment\n\n"
        elif rate < 5:
            result += "- **HIGH PRIORITY**: Low growth projection\n"
            result += "- Focus on skill development programs\n"
            result += "- Provide tax incentives for new businesses\n"
            result += "- Improve transportation and logistics infrastructure\n\n"
        else:
            result += "- **MEDIUM PRIORITY**: Moderate growth but still below average\n"
            result += "- Enhance existing growth drivers\n"
            result += "- Target specific sectors with growth potential\n"
            result += "- Implement inclusive growth policies\n\n"

    result += "### Overall Inequality Reduction Strategy:\n\n"
    result += "1. **Progressive Investment Allocation**: Allocate development funds inversely proportional to GDP ranking\n"
    result += "2. **Targeted Skill Development**: Focus education and training programs in lower-GDP states\n"
    result += "3. **Infrastructure Equalization**: Prioritize connectivity and utilities in underserved regions\n"
    result += "4. **Industrial Incentives**: Provide stronger incentives for businesses to locate in developing states\n"
    result += "5. **Social Safety Net**: Expand social programs in areas with lowest GDP per capita\n"

    return result

# Define UI functions
def handle_analysis(file):
    global global_forecast_df, global_ts_df

    if file is None:
        return "Please upload a file first.", None, None, {}, None, None

    status, model_results, metrics, viz_images, feature_importance, social_impact, forecast_df = run_analysis(file)

    # Store the forecast_df for other functions to use
    global_forecast_df = forecast_df

    # Extract the visualizations
    viz_dict = {}
    if viz_images:
        if 'top_states_trend' in viz_images:
            viz_dict['top_states_trend'] = f"data:image/png;base64,{viz_images['top_states_trend']}"
        if 'inequality_trend' in viz_images:
            viz_dict['inequality_trend'] = f"data:image/png;base64,{viz_images['inequality_trend']}"
        if 'model_performance' in viz_images:
            viz_dict['model_performance'] = f"data:image/png;base64,{viz_images['model_performance']}"
        if 'forecast' in viz_images:
            viz_dict['forecast'] = f"data:image/png;base64,{viz_images['forecast']}"
        if 'cv_trend' in viz_images:
            viz_dict['cv_trend'] = f"data:image/png;base64,{viz_images['cv_trend']}"

    return status, model_results, metrics, viz_dict, social_impact

# Handle state prediction
def handle_state_prediction(state_name):
    global global_forecast_df

    if global_forecast_df is None:
        return "Please run the analysis first to generate forecasts."

    return predict_state(state_name, global_forecast_df)

# Handle growth analysis
def handle_growth_analysis():
    global global_forecast_df

    if global_forecast_df is None:
        return "Please run the analysis first to generate forecasts."

    return get_growth_analysis(global_forecast_df)

# Handle recommendations
def handle_recommendations():
    global global_forecast_df, global_ts_df

    if global_forecast_df is None or global_ts_df is None:
        return "Please run the analysis first to generate forecasts."

    return generate_recommendations(global_forecast_df, global_ts_df)

# Fixed handle_analysis function with proper visualization output handling
def handle_analysis(file):
    global global_forecast_df, global_ts_df

    if file is None:
        return "Please upload a file first.", None, None, None, None, None, None, None, None

    status, model_results, metrics, viz_images, feature_importance, social_impact, forecast_df = run_analysis(file)

    # Store the forecast_df for other functions to use
    global_forecast_df = forecast_df

    # Initialize visualization outputs as None
    top_states_viz = None
    inequality_viz = None
    model_perf_viz = None
    forecast_viz = None
    cv_viz = None

    # Process visualization images if available
    if viz_images and isinstance(viz_images, dict):
        if 'top_states_trend' in viz_images:
            top_states_viz = f"data:image/png;base64,{viz_images['top_states_trend']}"
        if 'inequality_trend' in viz_images:
            inequality_viz = f"data:image/png;base64,{viz_images['inequality_trend']}"
        if 'model_performance' in viz_images:
            model_perf_viz = f"data:image/png;base64,{viz_images['model_performance']}"
        if 'forecast' in viz_images:
            forecast_viz = f"data:image/png;base64,{viz_images['forecast']}"
        if 'cv_trend' in viz_images:
            cv_viz = f"data:image/png;base64,{viz_images['cv_trend']}"

    return status, model_results, metrics, top_states_viz, inequality_viz, model_perf_viz, forecast_viz, cv_viz, social_impact

# Create the Gradio interface with fixed connections
def create_interface():
    with gr.Blocks(title="GDP Per Capita Analysis Tool") as app:
        gr.Markdown("# State Economic Analysis and Forecast Tool")
        gr.Markdown("Upload a CSV file with state GDP per capita data to analyze trends and generate forecasts.")

        with gr.Tab("Run Analysis"):
            with gr.Row():
                file_input = gr.File(label="Upload CSV Data File")

            with gr.Row():
                analyze_btn = gr.Button("Run Analysis", variant="primary")

            with gr.Row():
                status_output = gr.Textbox(label="Status", interactive=False)

            with gr.Accordion("Model Results", open=False):
                model_output = gr.Textbox(label="Model Evaluation", interactive=False)
                metrics_output = gr.Textbox(label="Best Model Metrics", interactive=False)

            with gr.Accordion("Visualizations", open=True):
                with gr.Row():
                    top_states_img = gr.Image(label="GDP Trends for Top States")
                    inequality_img = gr.Image(label="Inequality Trend")

                with gr.Row():
                    model_perf_img = gr.Image(label="Model Performance")
                    forecast_img = gr.Image(label="GDP Forecast")

                with gr.Row():
                    cv_img = gr.Image(label="Coefficient of Variation Trend")

            with gr.Accordion("Social Impact Analysis", open=False):
                social_output = gr.Textbox(label="Social Impact Analysis", interactive=False)

        with gr.Tab("State Predictions"):
            with gr.Row():
                state_input = gr.Textbox(label="Enter State Name")
                predict_btn = gr.Button("Get Prediction", variant="primary")

            with gr.Row():
                prediction_output = gr.Textbox(label="Prediction Results", interactive=False)

        with gr.Tab("Growth Analysis"):
            with gr.Row():
                growth_btn = gr.Button("Analyze Growth Rates", variant="primary")

            with gr.Row():
                growth_output = gr.Textbox(label="Growth Analysis", interactive=False)

        with gr.Tab("Policy Recommendations"):
            with gr.Row():
                recommendations_btn = gr.Button("Generate Recommendations", variant="primary")

            with gr.Row():
                recommendations_output = gr.Markdown(label="Policy Recommendations")

        # Connect functions to buttons with individual outputs clearly specified
        analyze_btn.click(
            fn=handle_analysis,
            inputs=[file_input],
            outputs=[
                status_output,          # Status
                model_output,           # Model Results
                metrics_output,         # Metrics
                top_states_img,         # Top States Trend Image
                inequality_img,         # Inequality Trend Image
                model_perf_img,         # Model Performance Image
                forecast_img,           # Forecast Image
                cv_img,                 # CV Trend Image
                social_output           # Social Impact Analysis
            ]
        )

        predict_btn.click(
            fn=handle_state_prediction,
            inputs=[state_input],
            outputs=[prediction_output]
        )

        growth_btn.click(
            fn=handle_growth_analysis,
            inputs=[],
            outputs=[growth_output]
        )

        recommendations_btn.click(
            fn=handle_recommendations,
            inputs=[],
            outputs=[recommendations_output]
        )

    return app

# Add a debugging function for visualization issues
def debug_visualizations(ts_df, y_test=None, y_pred=None, best_model_name=None, forecast_df=None):
    """Generate and debug visualizations to troubleshoot issues"""
    try:
        images = {}

        # 1. Simple test visualization to verify plotting works
        plt.figure(figsize=(8, 5))
        plt.plot([1, 2, 3, 4], [10, 20, 30, 40], marker='o')
        plt.title('Test Visualization')
        plt.xlabel('X')
        plt.ylabel('Y')
        plt.grid(True)

        buf = io.BytesIO()
        plt.savefig(buf, format='png')
        buf.seek(0)
        test_img = base64.b64encode(buf.read()).decode('utf-8')
        images['test_viz'] = test_img
        plt.close()

        # 2. Try the top states trend with extra error handling
        if ts_df is not None and len(ts_df) > 0:
            try:
                plt.figure(figsize=(10, 6))
                latest_year = ts_df['Year'].max()
                top_states_data = ts_df[ts_df['Year'] == latest_year]

                if len(top_states_data) > 0:
                    top_states = top_states_data.nlargest(min(5, len(top_states_data)), 'GDP_Per_Capita')['State'].unique()
                    top_states_data = ts_df[ts_df['State'].isin(top_states)]

                    if len(top_states_data) > 0:
                        sns.lineplot(data=top_states_data, x='Year', y='GDP_Per_Capita', hue='State')
                        plt.title('GDP Per Capita Trends for Top States')
                        plt.xlabel('Year')
                        plt.ylabel('GDP Per Capita')
                        plt.grid(True, alpha=0.3)

                        buf = io.BytesIO()
                        plt.savefig(buf, format='png')
                        buf.seek(0)
                        images['top_states_trend'] = base64.b64encode(buf.read()).decode('utf-8')
                plt.close()
            except Exception as e:
                print(f"Error in top states visualization: {str(e)}")

        return images
    except Exception as e:
        print(f"Debug visualization error: {str(e)}")
        return {}

# Update run_analysis to include debugging
def run_analysis_with_debug(file):
    """Wrapper around run_analysis with additional debugging"""
    try:
        result = run_analysis(file)
        status, model_results, metrics, viz_images, feature_importance, social_impact, forecast_df = result

        # If visualizations failed, try to debug
        if viz_images is None or len(viz_images) == 0:
            print("Visualizations failed, running debug function...")
            if 'global_ts_df' in globals() and global_ts_df is not None:
                debug_viz = debug_visualizations(global_ts_df, forecast_df=forecast_df)
                if debug_viz and len(debug_viz) > 0:
                    viz_images = debug_viz

        return status, model_results, metrics, viz_images, feature_importance, social_impact, forecast_df
    except Exception as e:
        import traceback
        return f"Error in analysis with debug: {str(e)}\n{traceback.format_exc()}", None, None, None, None, None, None

# Modified main function
def main():
    # Enable debug messages
    import logging
    logging.basicConfig(level=logging.DEBUG)

    # Create and launch the interface
    app = create_interface()
    app.launch(share=True)  # share=True will generate a public URL using ngrok

if __name__ == "__main__":
    main()

Colab notebook detected. To show errors in colab notebook, set debug=True in launch()
* Running on public URL: https://299ce1c5559a503430.gradio.live

This share link expires in 72 hours. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)
