## Import Necessary Libraries

In [None]:
import pandas as pd
import os

import model_metrics

print(model_metrics.__version__)

from model_tuner import loadObjects
from model_metrics import summarize_model_performance

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

## Lasso Regression Metrics

In [None]:
model_path = os.path.join(os.pardir, "model_files/single_model_regression_results")
model_lasso = loadObjects(os.path.join(model_path, "lasso_regression_model.pkl"))

X_test = pd.read_parquet(os.path.join(model_path, "X_test.parquet"))
y_test = pd.read_parquet(os.path.join(model_path, "y_test.parquet"))

X_valid = pd.read_parquet(os.path.join(model_path, "X_valid.parquet"))
y_valid = pd.read_parquet(os.path.join(model_path, "y_valid.parquet"))

## Summarize Model Performance

In [None]:
pipelines_or_models_lasso = [
    model_lasso,
]

# Model titles
model_title = ["Lasso Regression", "Random Forest"]

In [None]:
y_pred = [model.predict(X_test) for model in pipelines_or_models_lasso]

In [None]:
summarize_model_performance(
    # model=pipelines_or_models_lasso,
    y_pred=y_pred,
    # X=X_test,
    y=y_test,
    model_type="regression",
    model_title=model_title,
    # model_threshold=thresholds,
    # return_df=True,
    # overall_only=True,
    # decimal_places=2,
)

# print(model_summary_lasso)
# model_summary_lasso

In [None]:
# Load dataset
diabetes = load_diabetes(as_frame=True)["frame"]
X = diabetes.drop(columns=["target"])
y = diabetes["target"]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

# Train Linear Regression (on unscaled data)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Train Random Forest Regressor (on unscaled data)
rf_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
)
rf_model.fit(X_train, y_train)

# Train Ridge Regression (on scaled data)
ridge_model = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("estimator", Ridge(alpha=1.0)),
    ]
)
ridge_model.fit(X_train, y_train)

# Evaluate models using the correct input format
print("Linear Regression Score:", linear_model.score(X_test, y_test))  # Unscaled
print("Ridge Regression Score:", ridge_model.score(X_test, y_test))  # Scaled
print("Random Forest Score:", rf_model.score(X_test, y_test))  # Unscaled

In [None]:
y_pred_2 = [model.predict(X_test) for model in [linear_model, ridge_model]]

In [None]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[linear_model, ridge_model],
    # y_pred=y_pred_2,
    model_title=[
        "Linear Regression",
        "Ridge Regression",
    ],
    X=X_test,
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics

In [None]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[rf_model],
    model_title=["Random Forest"],
    X=X_test,  # Now Ridge gets the scaled version while others get unscaled
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics

In [None]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[linear_model, ridge_model, rf_model],
    model_title=[
        "Linear Regression",
        "Ridge Regression",
        "Random Forest",
    ],
    X=X_test,  # Now Ridge gets the scaled version while others get unscaled
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics