# WEO Economic Data Analysis & Recession Prediction

**Objective:** Load World Economic Outlook (WEO) data, clean and transform it, then use machine learning models to predict global recessions.

**Workflow:**
1. Data loading and cleaning
2. Feature engineering and recession flagging
3. Exploratory data analysis
4. Model training with full and reduced feature sets (comparing 13 vs 5 features)
5. Economy-specific analysis (Upper vs Lower economies with both feature sets)
6. Future predictions for all scenarios

**Models Used:** Logistic Regression, Random Forest, Gradient Boosting, Linear SVM, KNN, Naive Bayes, MLP, Decision Tree, and Ensemble

In [None]:
# Core data manipulation and visualization
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
# Machine learning models
from sklearn.model_selection import train_test_split, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.svm import LinearSVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

from sklearn.metrics import (
    classification_report, 
    accuracy_score, 
    precision_score, 
    recall_score, 
    f1_score, 
    roc_auc_score,
    confusion_matrix
)

# File handling
import csv
from pathlib import Path

# Optional XGBoost
try:
    from xgboost import XGBClassifier
    HAS_XGB = True
except ImportError:
    HAS_XGB = False

# Optional pycountry for continent mapping
try:
    import pycountry
    import pycountry_convert as pc
    HAS_PYCOUNTRY = True
except ImportError:
    HAS_PYCOUNTRY = False

# 1. Data Loading

In [None]:
p = Path(r"data.csv")
if not p.exists():
    raise FileNotFoundError(p)

# Detect encoding and delimiter
encoding = "utf-8"
try:
    sample = p.read_text(encoding=encoding)[:8192]
except UnicodeDecodeError:
    encoding = "latin-1"
    sample = p.read_text(encoding=encoding)[:8192]

try:
    delim = csv.Sniffer().sniff(sample).delimiter
except Exception:
    delim = ","

df = pd.read_csv(p, sep=delim, encoding=encoding, low_memory=False, parse_dates=True)
print("Shape:", df.shape)
df.head()

In [None]:
print(f"Number of columns: {df.shape[1]}")
print(f"Number of rows: {df.shape[0]}")
print("\nColumn names:", df.columns.tolist())

# 2. Data Cleaning & Transformation

## Filter to Selected Economic Indicators

In [None]:
df.drop(columns=["WEO Country Code", "ISO", "Country/Series-specific Notes", "Subject Notes", 
                 "Units", "Scale", "Estimates Start After", "Subject Descriptor"], inplace=True)

codes = {
    # Core growth & external
    "NGDP_RPCH", "NGDPRPC", "PCPIPCH", "TX_RPCH", "TM_RPCH", "BCA_NGDPD",
    # Fiscal & debt aggregates
    "GGR_NGDP", "GGX_NGDP", "GGXWDN_NGDP", "GGXWDG_NGDP",
    # Savings & investment
    "NGSD_NGDP", "NID_NGDP",
    # Prices
    "PCPI"
}

col = "WEO Subject Code"

if col not in df.columns:
    raise KeyError(f"Column {col!r} not found in dataframe")

df = df[df[col].astype(str).str.strip().isin(codes)].copy()
print("shape after filter:", df.shape)
df

## Data Reshaping: Wide to Long to Wide

In [None]:
year_cols = df.columns[2:]

df[year_cols] = df[year_cols].replace({',': ''}, regex=True)
df[year_cols] = df[year_cols].apply(pd.to_numeric, errors="coerce")

df["Country"] = (
    df["Country"]
    .str.replace(" ", "_")
    .str.replace("'", "")
    .str.replace("-", "_")
)

df_long = df.melt(id_vars=["WEO Subject Code", "Country"],
                  var_name="Year", value_name="Value")

df_long["Year"] = df_long["Year"].astype(str).str.strip()
df_long = df_long[df_long["Year"].str.fullmatch(r"\d{4}")].copy()
df_long["Year"] = df_long["Year"].astype(int)

df_long["Value"] = (
    df_long["Value"].astype(str)
    .str.replace(",", "")
    .replace({"": None, "nan": None})
    .astype(float)
)

df_pivot = df_long.pivot_table(
    index=["Country", "Year"],
    columns="WEO Subject Code",
    values="Value",
    aggfunc="first"
).reset_index()

df_pivot.columns.name = None
df_pivot = df_pivot.set_index("Year")

df_pivot

# 3. Feature Engineering

## Add Recession Target Variable

In [None]:
df_pivot["Country"].unique()

In [None]:
# --- Step 1: Define IMF-recognized global recession years ---
global_recession_years = [1982, 1991, 2009, 2020]

# --- Step 2: Ensure chronological order ---
df_pivot = df_pivot.sort_index()

# --- Step 3a: GDP-based local recession flag (two consecutive annual declines)
flag_gdp = (
    (df_pivot.groupby("Country")["NGDPRPC"].transform(lambda x: x.pct_change() < 0)) &
    (df_pivot.groupby("Country")["NGDPRPC"].transform(lambda x: x.pct_change().shift(-1) < 0))
).astype(int)

# --- Step 3c: Investment collapse
flag_invest = (
    df_pivot.groupby("Country")["NID_NGDP"].transform(lambda x: x.diff() < -2)
).astype(int)

# --- Step 3d: Savings decline
flag_savings = (
    df_pivot.groupby("Country")["NGSD_NGDP"].transform(lambda x: x.diff() < -2)
).astype(int)

# --- Step 3e: Trade shock (both exports and imports decline)
flag_trade = (
    (df_pivot.groupby("Country")["TX_RPCH"].transform(lambda x: x < 0)) &
    (df_pivot.groupby("Country")["TM_RPCH"].transform(lambda x: x < 0))
).astype(int)

# --- Step 3f: Fiscal stress (large deficit + rising debt)
flag_fiscal = (
    (df_pivot.groupby("Country")["GGXCNL_NGDP"].transform(lambda x: x < -5)) &
    (df_pivot.groupby("Country")["GGXWDN_NGDP"].transform(lambda x: x.diff() > 3))
).astype(int)

# --- Step 3g: Inflation shock (stagflation scenario)
flag_inflation = (
    (df_pivot.groupby("Country")["PCPIPCH"].transform(lambda x: x > 10)) &
    (df_pivot.groupby("Country")["NGDPRPC"].transform(lambda x: x.pct_change() < 0))
).astype(int)

# --- Step 3h: Unified local recession flag
local_recession_flag = (
    flag_gdp | flag_unemp | flag_invest | flag_savings |
    flag_trade | flag_fiscal | flag_inflation
)

# --- Step 3i: Combine with global recession years
df_pivot["Recession"] = (
    df_pivot.index.isin(global_recession_years).astype(int) | local_recession_flag
)

# --- Step 4: Clean dataset ---
df_pivot = df_pivot.dropna().sort_index(ascending=True)

# --- Preview ---
df_pivot


In [None]:
# Define global recession years
global_recession_years = [1982, 1991, 2009, 2020]

# Filter all rows where Recession == 1 but year not in global list
local_only_recessions = df_pivot[
    (df_pivot["Recession"] == 1) & 
    (~df_pivot.index.isin(global_recession_years))
]
local_only_recessions


## Review Remaining Countries

In [None]:
df_pivot["Country"].unique()

## Split Training and Prediction Data

In [None]:
df_predict = df_pivot.loc[df_pivot.index > 2024]
df_predict_original = df_predict.copy()
df_predict = df_predict.drop(columns=["Recession", "Country"])
df_predict

In [None]:
df_filtered = df_pivot.loc[df_pivot.index <= 2024]
df_filtered

# 4. Exploratory Data Analysis

## Correlation Heatmap

In [None]:
corr = df_filtered.drop(columns=["Country", "Recession"]).corr()
plt.figure(figsize=(10, 8))
sns.heatmap(corr, annot=True, fmt=".2f", cmap="coolwarm", square=True)
plt.title("Correlation Heatmap of Features")
plt.show()

## Prepare Features and Target

In [None]:
X = df_filtered.drop(columns=["Recession", "Country"])
y = df_filtered["Recession"]

# 5. Machine Learning Models

## Global Dataset - Full Features (13 Features)

### Define and Train All Models

In [None]:
# ============================================================
#                  TRAINING FUNCTION with SMOTE + Metrics + Confusion Matrix
# ============================================================
from imblearn.over_sampling import SMOTE
from imblearn.pipeline import Pipeline as ImbPipeline
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score,
    confusion_matrix, ConfusionMatrixDisplay
)
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, VotingClassifier
from sklearn.tree import DecisionTreeClassifier
# from xgboost import XGBClassifier   # uncomment if you use XGBoost

def train_all_models(X_train, y_train, X_test, y_test, model_params=None, use_xgb=False, threshold=0.5):
    """
    Train multiple models with SMOTE, evaluate them on train/test sets,
    and return metrics including confusion matrices. Threshold can be set
    for probability-based classifiers.
    """

    # Default hyperparameters
    if model_params is None:
        model_params = {
            'logit': {
                'C': 0.2, 'penalty': 'l2', 'solver': 'lbfgs',
                'max_iter': 5000, 'random_state': 42
            },
            'rf': {
                'n_estimators': 200, 'max_depth': 4,
                'min_samples_leaf': 20, 'min_samples_split': 20,
                'max_features': 0.3, 'random_state': 42
            },
            'gb': {
                'n_estimators': 200, 'learning_rate': 0.03,
                'max_depth': 2, 'min_samples_leaf': 20,
                'subsample': 0.6, 'random_state': 42
            },
            'dt': {
                'max_depth': 3, 'min_samples_leaf': 30, 'random_state': 42
            },
            'xgb': {
                'n_estimators': 200, 'learning_rate': 0.05,
                'max_depth': 2, 'subsample': 0.7, 'colsample_bytree': 0.6,
                'reg_alpha': 0.4, 'reg_lambda': 2.0, 'random_state': 42,
                'use_label_encoder': False, 'eval_metric': 'logloss'
            }
        }

    # Train models -----------------------------------------------------
    logit = ImbPipeline([
        ("scaler", StandardScaler()),
        ("smote", SMOTE(random_state=42)),
        ("logit", LogisticRegression(**model_params['logit']))
    ])
    logit.fit(X_train, y_train)

    rf = ImbPipeline([
        ("smote", SMOTE(random_state=42)),
        ("rf", RandomForestClassifier(**model_params['rf']))
    ])
    rf.fit(X_train, y_train)

    gb = ImbPipeline([
        ("smote", SMOTE(random_state=42)),
        ("gb", GradientBoostingClassifier(**model_params['gb']))
    ])
    gb.fit(X_train, y_train)

    dt = ImbPipeline([
        ("smote", SMOTE(random_state=42)),
        ("dt", DecisionTreeClassifier(**model_params['dt']))
    ])
    dt.fit(X_train, y_train)

    models = {
        "Logistic Regression": logit,
        "Random Forest": rf,
        "Gradient Boosting": gb,
        "Decision Tree": dt,
    }

    # XGBoost optional
    if use_xgb:
        xgb = ImbPipeline([
            ("smote", SMOTE(random_state=42)),
            ("xgb", XGBClassifier(**model_params['xgb']))
        ])
        xgb.fit(X_train, y_train)
        models["XGBoost"] = xgb

    # Ensemble (soft voting only)
    ensemble_estimators = [
        ("logit", logit.named_steps["logit"]),
        ("rf", rf.named_steps["rf"]),
        ("gb", gb.named_steps["gb"])
    ]
    if use_xgb:
        ensemble_estimators.append(("xgb", xgb.named_steps["xgb"]))

    ensemble = VotingClassifier(estimators=ensemble_estimators, voting="soft")
    ensemble.fit(X_train, y_train)
    models["Ensemble"] = ensemble

    # Compute metrics -----------------------------------------------------------
    results = {}
    confusion_mats = {}

    for name, m in models.items():
        if hasattr(m, "predict_proba"):
            y_pred_train = (m.predict_proba(X_train)[:, 1] >= threshold).astype(int)
            y_pred_test = (m.predict_proba(X_test)[:, 1] >= threshold).astype(int)
        else:
            y_pred_train = m.predict(X_train)
            y_pred_test = m.predict(X_test)

        results[name] = {
            "Train Accuracy": accuracy_score(y_train, y_pred_train),
            "Test Accuracy": accuracy_score(y_test, y_pred_test),
            "Precision": precision_score(y_test, y_pred_test, zero_division=0),
            "Recall": recall_score(y_test, y_pred_test, zero_division=0),
            "F1": f1_score(y_test, y_pred_test, zero_division=0)
        }

        confusion_mats[name] = confusion_matrix(y_test, y_pred_test)

    results_df = pd.DataFrame(results).T
    return models, results_df, confusion_mats


# ============================================================
#             FEATURE IMPORTANCE PLOTTING
# ============================================================
def plot_feature_importance(models, feature_names, title_prefix=""):
    logit = models.get("Logistic Regression")
    rf = models.get("Random Forest")
    gb = models.get("Gradient Boosting")
    dt = models.get("Decision Tree")

    coef = logit.named_steps['logit'].coef_[0]
    logit_importance = pd.DataFrame({
        "Feature": feature_names,
        "Coefficient": coef,
        "Abs_Importance": np.abs(coef)
    }).sort_values("Abs_Importance")

    rf_importance = pd.DataFrame({
        "Feature": feature_names,
        "Importance": rf.named_steps['rf'].feature_importances_
    }).sort_values("Importance")

    gb_importance = pd.DataFrame({
        "Feature": feature_names,
        "Importance": gb.named_steps['gb'].feature_importances_
    }).sort_values("Importance")

    dt_importance = pd.DataFrame({
        "Feature": feature_names,
        "Importance": dt.named_steps['dt'].feature_importances_
    }).sort_values("Importance")

    fig, axes = plt.subplots(2, 2, figsize=(16, 10))
    axes[0, 0].barh(logit_importance["Feature"], logit_importance["Coefficient"])
    axes[0, 0].set_title(f"{title_prefix}Logistic Regression")

    axes[0, 1].barh(rf_importance["Feature"], rf_importance["Importance"])
    axes[0, 1].set_title(f"{title_prefix}Random Forest")

    axes[1, 0].barh(gb_importance["Feature"], gb_importance["Importance"])
    axes[1, 0].set_title(f"{title_prefix}Gradient Boosting")

    axes[1, 1].barh(dt_importance["Feature"], dt_importance["Importance"])
    axes[1, 1].set_title(f"{title_prefix}Decision Tree")

    plt.tight_layout()
    plt.show()


# ============================================================
#                  PREDICTION FUNCTION
# ============================================================
def make_predictions(models, df_predict, use_threshold=True, threshold=0.20):
    """
    Return predictions from every model in one dataframe.
    """
    predictions = {}
    for name, model in models.items():
        if use_threshold and hasattr(model, 'predict_proba'):
            proba = model.predict_proba(df_predict)[:, 1]
            predictions[name] = (proba >= threshold).astype(int)
        else:
            predictions[name] = model.predict(df_predict)
    return pd.DataFrame(predictions, index=df_predict.index)


# ============================================================
#             CONFUSION MATRIX DISPLAY FUNCTION (with metrics incl. Accuracy)
# ============================================================
def show_confusion_matrices(confusion_mats, results_df, threshold=0.5):
    """
    Display confusion matrices for all models side by side,
    with Accuracy, Precision, Recall, and F1 shown under each matrix.
    """
    import matplotlib.pyplot as plt
    from sklearn.metrics import ConfusionMatrixDisplay

    n_models = len(confusion_mats)
    fig, axes = plt.subplots(1, n_models, figsize=(5*n_models, 5))

    if n_models == 1:
        axes = [axes]

    for ax, (name, cm) in zip(axes, confusion_mats.items()):
        disp = ConfusionMatrixDisplay(confusion_matrix=cm)
        disp.plot(cmap="Blues", ax=ax, colorbar=False)

        # Extract metrics from results_df
        accuracy = results_df.loc[name, "Test Accuracy"]
        precision = results_df.loc[name, "Precision"]
        recall = results_df.loc[name, "Recall"]
        f1 = results_df.loc[name, "F1"]

        ax.set_title(f"{name}\n(threshold={threshold})")
        ax.set_xlabel(
            f"Acc={accuracy:.2f}, Prec={precision:.2f}, Rec={recall:.2f}, F1={f1:.2f}"
        )

    plt.tight_layout()
    plt.show()



### Train-Test Split

In [None]:
split_index = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

In [None]:
models, summary_df, confusion_mats = train_all_models(X_train, y_train, X_test, y_test)
print(summary_df)
print(confusion_mats["Ensemble"])  # example: confusion matrix for ensemble

plot_feature_importance(models, X_train.columns.tolist())

predictions = make_predictions(models, df_predict)
print(predictions.head())

# Show confusion matrices with metrics underneath
show_confusion_matrices(confusion_mats, summary_df, threshold=0.5)

### Reduced Global Set

In [None]:
selected_features = ['TM_RPCH', 'GGXONLB_NGDP', 'TX_RPCH', 'GGXCNL_NGDP', 'PCPI']
X_train_reduced = X_train[selected_features]
X_test_reduced = X_test[selected_features]

# Unpack all three return values
models_reduced, summary_df_reduced, confusion_mats_reduced = train_all_models(
    X_train_reduced, y_train, X_test_reduced, y_test, threshold=0.5
)

# Show metrics table
print(summary_df_reduced)


In [None]:
# Plot feature importance
plot_feature_importance(models_reduced, feature_names=selected_features, title_prefix="Reduced Features - ")


In [None]:
# Predictions on restricted df
df_predict_restricted = df_predict[selected_features]
predictions_restricted = make_predictions(models_reduced, df_predict_restricted, threshold=0.5)
print(predictions_restricted)


In [None]:

# Show confusion matrices (all in one window)
show_confusion_matrices(confusion_mats_reduced, summary_df_reduced, threshold=0.5)

### Split Dataset

In [None]:
# Map countries to continents (same logic as before)
try:
    import pycountry
    import pycountry_convert as pc
    
    def country_to_continent(name):
        try:
            lookup_name = name.replace('_', ' ')
            country = pycountry.countries.lookup(lookup_name)
            alpha2 = country.alpha_2
            cc = pc.country_alpha2_to_continent_code(alpha2)
            continent_map = {
                'AF': 'Africa',
                'AS': 'Asia',
                'EU': 'Europe',
                'NA': 'North_America',
                'OC': 'Oceania',
                'SA': 'South_America'
            }
            return continent_map.get(cc, 'Unknown')
        except Exception:
            return 'Unknown'
except ImportError:
    # Fallback mapping for common countries (extend as needed)
    fallback = {
        'United_States': 'North_America', 'Canada': 'North_America', 'Mexico': 'North_America',
        'China': 'Asia', 'India': 'Asia', 'Japan': 'Asia', 'Afghanistan': 'Asia',
        'Korea': 'Asia', 'Indonesia': 'Asia', 'Thailand': 'Asia', 'Vietnam': 'Asia',
        'Germany': 'Europe', 'France': 'Europe', 'United_Kingdom': 'Europe', 'Italy': 'Europe',
        'Spain': 'Europe', 'Russia': 'Europe', 'Turkey': 'Europe', 'Poland': 'Europe',
        'Brazil': 'South_America', 'Argentina': 'South_America', 'Chile': 'South_America',
        'Colombia': 'South_America', 'Peru': 'South_America', 'Venezuela': 'South_America',
        'Australia': 'Oceania', 'New_Zealand': 'Oceania',
        'South_Africa': 'Africa', 'Nigeria': 'Africa', 'Egypt': 'Africa', 'Zimbabwe': 'Africa',
        'Kenya': 'Africa', 'Ethiopia': 'Africa', 'Morocco': 'Africa',
        # Additional countries...
        'Albania': 'Europe', 'Algeria': 'Africa', 'Austria': 'Europe', 'Barbados': 'North_America',
        'Belgium': 'Europe', 'Bolivia': 'South_America', 'Bosnia_and_Herzegovina': 'Europe',
        'Bulgaria': 'Europe', 'Cabo_Verde': 'Africa', 'Costa_Rica': 'North_America',
        'Croatia': 'Europe', 'Cyprus': 'Europe', 'Czech_Republic': 'Europe', 'Denmark': 'Europe',
        'Dominican_Republic': 'North_America', 'Estonia': 'Europe', 'Finland': 'Europe',
        'Hungary': 'Europe', 'Iceland': 'Europe', 'Ireland': 'Europe',
        'Islamic_Republic_of_Iran': 'Asia', 'Israel': 'Asia', 'Jordan': 'Asia',
        'Kazakhstan': 'Asia', 'Latvia': 'Europe', 'Lebanon': 'Asia', 'Lithuania': 'Europe',
        'Luxembourg': 'Europe', 'Malta': 'Europe', 'Netherlands': 'Europe',
        'North_Macedonia': 'Europe', 'Norway': 'Europe', 'Pakistan': 'Asia',
        'Panama': 'North_America', 'Paraguay': 'South_America', 'Portugal': 'Europe',
        'Romania': 'Europe', 'Saudi_Arabia': 'Asia', 'Serbia': 'Europe', 'Seychelles': 'Africa',
        'Slovak_Republic': 'Europe', 'Slovenia': 'Europe', 'Sweden': 'Europe',
        'Switzerland': 'Europe', 'Syria': 'Asia', 'Taiwan_Province_of_China': 'Asia',
        'Trinidad_and_Tobago': 'North_America', 'Türkiye': 'Europe', 'Uruguay': 'South_America'
    }
    
    def country_to_continent(name):
        return fallback.get(name.replace(' ', '_'), 'Unknown')

# --- Add Continent column ---
df_filtered_copy = df_pivot.copy()
df_filtered_copy['Continent'] = df_filtered_copy['Country'].astype(str).apply(country_to_continent)

# --- Map continents to economy groups ---
continent_to_economy = {
    'Europe': 'Upper_Economies',
    'North_America': 'Upper_Economies',
    'Oceania': 'Upper_Economies',
    'Africa': 'Lower_Economies',
    'Asia': 'Lower_Economies',
    'South_America': 'Lower_Economies'
}

df_filtered_copy['EconomyGroup'] = df_filtered_copy['Continent'].map(continent_to_economy)

# --- Create Lower and Upper economy DataFrames ---
df_Lower_Economies = df_filtered_copy[df_filtered_copy['EconomyGroup'] == 'Lower_Economies'].drop(columns=['Continent','EconomyGroup'])
df_Upper_Economies = df_filtered_copy[df_filtered_copy['EconomyGroup'] == 'Upper_Economies'].drop(columns=['Continent','EconomyGroup'])

# --- Print summary ---
print("Created economy-specific DataFrames:")
print(f" - Lower_Economies: df_Lower_Economies (rows: {len(df_Lower_Economies)})")
print(f" - Upper_Economies: df_Upper_Economies (rows: {len(df_Upper_Economies)})")


In [None]:
df_Lower_Economies

In [None]:
df_Upper_Economies

## Split Dataset of prediction

In [None]:
df_predict_original['Continent'] = df_predict_original['Country'].astype(str).apply(country_to_continent)

continent_to_economy = {
    'Europe': 'Upper_Economies',
    'North_America': 'Upper_Economies',
    'Oceania': 'Upper_Economies',
    'Africa': 'Lower_Economies',
    'Asia': 'Lower_Economies',
    'South_America': 'Lower_Economies'
}

df_predict_original['EconomyGroup'] = df_predict_original['Continent'].map(continent_to_economy)

df_predict_lower = df_predict_original[df_predict_original['EconomyGroup'] == 'Lower_Economies'].drop(
    columns=['Continent', 'EconomyGroup', 'Country']
)
df_predict_upper = df_predict_original[df_predict_original['EconomyGroup'] == 'Upper_Economies'].drop(
    columns=['Continent', 'EconomyGroup', 'Country']
)

print("Created economy-specific prediction DataFrames from df_predict_original:")
print(f" - Lower_Economies predictions: {len(df_predict_lower)} rows")
print(f" - Upper_Economies predictions: {len(df_predict_upper)} rows")

df_predict_lower.head()
df_predict_upper.head()

# 6. Economy-Specific Analysis

## Upper Economies - Full Features

In [None]:
# Prepare data
X = df_Upper_Economies.drop(columns=["Recession", "Country"])
y = df_Upper_Economies["Recession"]

split_index = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

# Train models and unpack all three return values
models_upper, summary_df_upper, confusion_mats_upper = train_all_models(
    X_train, y_train, X_test, y_test, threshold=0.5
)

# Show metrics table
print(summary_df_upper)

# Plot feature importance
plot_feature_importance(models_upper, X_train.columns.tolist(), title_prefix="Upper Economies - ")

# Predictions on restricted df
X_predict_upper = df_predict_upper.drop(columns=["Recession", "Country"], errors='ignore')
predictions_upper = make_predictions(models_upper, X_predict_upper, threshold=0.5)
print(predictions_upper)

# Show confusion matrices (all in one window)
show_confusion_matrices(confusion_mats_upper, summary_df_upper, threshold=0.5)


## Lower Economies - Full Features

In [None]:
# Prepare data for Lower Economies
X = df_Lower_Economies.drop(columns=["Recession", "Country"])
y = df_Lower_Economies["Recession"]

split_index = int(len(X) * 0.8)
X_train, X_test = X.iloc[:split_index], X.iloc[split_index:]
y_train, y_test = y.iloc[:split_index], y.iloc[split_index:]

# Train models and unpack all three return values
models_lower, summary_df_lower, confusion_mats_lower = train_all_models(
    X_train, y_train, X_test, y_test, threshold=0.5
)

# Show metrics table
print(summary_df_lower)

# Plot feature importance
plot_feature_importance(models_lower, X_train.columns.tolist(), title_prefix="Lower Economies - ")

# Predictions on restricted df
X_predict_lower = df_predict_lower.drop(columns=["Recession", "Country"], errors='ignore')
predictions_lower = make_predictions(models_lower, X_predict_lower, threshold=0.5)
predictions_lower.head()

# Show confusion matrices (all in one window)
show_confusion_matrices(confusion_mats_lower, summary_df_lower, threshold=0.5)


## Upper Economies - Reduced Features

In [None]:
selected_features = ['TM_RPCH', 'GGXONLB_NGDP', 'TX_RPCH', 'GGXCNL_NGDP', 'PCPI']

X_upper = df_Upper_Economies[selected_features]
y_upper = df_Upper_Economies["Recession"]

split_index_upper = int(len(X_upper) * 0.8)
X_train_upper = X_upper.iloc[:split_index_upper]
X_test_upper = X_upper.iloc[split_index_upper:]
y_train_upper = y_upper.iloc[:split_index_upper]
y_test_upper = y_upper.iloc[split_index_upper:]

# ✅ Unpack all three return values
models_upper, summary_df_upper, confusion_mats_upper = train_all_models(
    X_train_upper, y_train_upper, X_test_upper, y_test_upper, threshold=0.5
)

print("Upper Economies Accuracy (Reduced Features):")
print(summary_df_upper)

# Feature importance
plot_feature_importance(models_upper, feature_names=selected_features, 
                        title_prefix="Upper Economies - Reduced Features - ")

# Predictions
X_predict_upper_reduced = df_predict_upper[selected_features]
predictions_upper_reduced = make_predictions(models_upper, X_predict_upper_reduced, threshold=0.5)
print("Predictions for Upper Economies (Reduced Features):")
print(predictions_upper_reduced)

# ✅ Show confusion matrices (all in one window)
show_confusion_matrices(confusion_mats_upper, summary_df_upper, threshold=0.5)


## Lower Economies - Reduced Features

In [None]:
X_lower = df_Lower_Economies[selected_features]
y_lower = df_Lower_Economies["Recession"]

split_index_lower = int(len(X_lower) * 0.8)
X_train_lower = X_lower.iloc[:split_index_lower]
X_test_lower = X_lower.iloc[split_index_lower:]
y_train_lower = y_lower.iloc[:split_index_lower]
y_test_lower = y_lower.iloc[split_index_lower:]

# ✅ Unpack all three return values
models_lower, summary_df_lower, confusion_mats_lower = train_all_models(
    X_train_lower, y_train_lower, X_test_lower, y_test_lower, threshold=0.5
)

print("Lower Economies Accuracy (Reduced Features):")
print(summary_df_lower)

# Feature importance
plot_feature_importance(models_lower, feature_names=selected_features, 
                        title_prefix="Lower Economies - Reduced Features - ")

# Predictions
X_predict_lower_reduced = df_predict_lower[selected_features]
predictions_lower_reduced = make_predictions(models_lower, X_predict_lower_reduced, threshold=0.5)
print("Predictions for Lower Economies (Reduced Features):")
print(predictions_lower_reduced)

# ✅ Show confusion matrices (all in one window)
show_confusion_matrices(confusion_mats_lower, summary_df_lower, threshold=0.5)
