In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

def evaluate_prophet_model(df):
    """
    df must have columns: ds (date), y (actual value)
    """

    # Train-test split (last 20% as test)
    split_index = int(len(df) * 0.8)
    train_df = df[:split_index]
    test_df = df[split_index:]

    # Fit Prophet
    model = Prophet()
    model.fit(train_df)

    # Predict
    future = model.make_future_dataframe(periods=len(test_df))
    forecast = model.predict(future)

    # Extract only test predictions
    pred = forecast['yhat'][split_index:]
    actual = test_df['y'].values

    # Metrics
    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - pred) / actual)) * 100

    return {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "MAPE (%)": mape
    }


In [None]:
!pip install prophet
!pip install statsmodels
!pip install scikit-learn




In [None]:
from google.colab import files
uploaded = files.upload()


In [None]:
datasets = [
    "API_EG.FEC.RNEW.ZS_DS2_en_csv_v2_3233.csv",
    "API_EG.USE.COMM.FO.ZS_DS2_en_csv_v2_8577.csv",
    "API_EG.USE.ELEC.KH.PC_DS2_en_csv_v2_4464.csv"
]


In [None]:
import pandas as pd
import numpy as np

def load_and_prepare(path):
    df = pd.read_csv(path, skiprows=4)  # DS2 files require skipping first 4 header rows

    # Loop to find a row with at least 10 numeric data points
    for i in range(len(df)):
        row = df.iloc[i]

        # Year columns start from index 4 onward
        years = df.columns[4:]
        values = pd.to_numeric(row[4:], errors='coerce')

        if pd.Series(values).notna().sum() >= 10:
            ts = pd.DataFrame({
                "ds": pd.to_datetime(years, errors='coerce', format='%Y'),
                "y": values
            }).dropna()

            return ts

    # If no row found
    return pd.DataFrame(columns=["ds", "y"])


In [None]:
from prophet import Prophet
from sklearn.metrics import mean_absolute_error, mean_squared_error
import numpy as np

def evaluate_prophet(df):
    split = int(len(df) * 0.8)
    train = df[:split]
    test = df[split:]

    model = Prophet()
    model.fit(train)

    future = model.make_future_dataframe(periods=len(test))
    forecast = model.predict(future)

    pred = forecast['yhat'][split:]
    actual = test['y'].values

    mae = mean_absolute_error(actual, pred)
    mse = mean_squared_error(actual, pred)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((actual - pred) / actual)) * 100

    return {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "MAPE": mape,
        "model": model,
        "pred": pred,
        "actual": actual
    }


In [None]:
from statsmodels.tsa.arima.model import ARIMA

def evaluate_arima(series, order=(5,1,0)):
    split = int(len(series) * 0.8)
    train, test = series[:split], series[split:]

    model = ARIMA(train, order=order).fit()
    forecast = model.forecast(steps=len(test))

    mae = mean_absolute_error(test, forecast)
    mse = mean_squared_error(test, forecast)
    rmse = np.sqrt(mse)
    mape = np.mean(np.abs((test - forecast) / test)) * 100

    return {
        "MAE": mae,
        "MSE": mse,
        "RMSE": rmse,
        "MAPE": mape,
        "pred": forecast,
        "actual": test
    }


In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def classification_metrics(actual, predicted):
    actual = np.array(actual)
    predicted = np.array(predicted)

    threshold = actual.mean()

    actual_class = (actual > threshold).astype(int)
    predicted_class = (predicted > threshold).astype(int)

    accuracy = accuracy_score(actual_class, predicted_class)
    precision = precision_score(actual_class, predicted_class, zero_division=0)
    recall = recall_score(actual_class, predicted_class, zero_division=0)
    f1 = f1_score(actual_class, predicted_class, zero_division=0)

    return {
        "Accuracy": accuracy,
        "Precision": precision,
        "Recall": recall,
        "F1 Score": f1
    }


In [None]:
def print_metrics(title, metrics):
    print(f"\n{title}")
    for key, value in metrics.items():
        if isinstance(value, float):
            print(f"  {key}: {value:.4f}")
        else:
            print(f"  {key}: {value}")



In [None]:
print("\nüìä Evaluation Metrics for All Datasets\n")

for file in datasets:
    print("==========================================")
    print(f"üìå Dataset: {file}")

    df = load_and_prepare(file)
    print(f"‚úî Loaded {len(df)} time-series points")

    prophet_metrics = evaluate_prophet(df)
    arima_metrics = evaluate_arima(df['y'])

    # Print regression metrics
    print_metrics("üîµ Prophet Regression Metrics:", prophet_metrics)
    print_metrics("üü† ARIMA Regression Metrics:", arima_metrics)

    # ‚≠ê‚≠ê‚≠ê CLASSIFICATION METRICS (THIS IS WHAT YOU ASKED)
    prophet_class = classification_metrics(prophet_metrics["actual"], prophet_metrics["pred"])
    arima_class = classification_metrics(arima_metrics["actual"], arima_metrics["pred"])

    print_metrics("üîµ Prophet Classification Metrics:", prophet_class)
    print_metrics("üü† ARIMA Classification Metrics:", arima_class)

    print("==========================================\n")


In [None]:
df = load_and_prepare("API_EG.USE.ELEC.KH.PC_DS2_en_excel_v2_3856.csv")
print(df)
print("Valid values:", len(df))
