## Import Necessary Libraries

In [1]:
import pandas as pd
import os

import model_metrics

print(model_metrics.__version__)

from model_tuner import loadObjects
from model_metrics import summarize_model_performance

from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression, Ridge
from sklearn.ensemble import RandomForestRegressor
from sklearn.pipeline import Pipeline

0.0.2a


## Lasso Regression Metrics

In [2]:
model_path = os.path.join(os.pardir, "model_files/single_model_regression_results")
model_lasso = loadObjects(os.path.join(model_path, "lasso_regression_model.pkl"))

X_test = pd.read_parquet(os.path.join(model_path, "X_test.parquet"))
y_test = pd.read_parquet(os.path.join(model_path, "y_test.parquet"))

X_valid = pd.read_parquet(os.path.join(model_path, "X_valid.parquet"))
y_valid = pd.read_parquet(os.path.join(model_path, "y_valid.parquet"))

Object loaded!


## Summarize Model Performance

In [3]:
pipelines_or_models_lasso = [
    model_lasso,
]

# Model titles
model_titles = ["Lasso Regression", "Random Forest"]

In [4]:
summarize_model_performance(
    model=pipelines_or_models_lasso,
    X=X_test,
    y=y_test,
    model_type="regression",
    model_titles=model_titles,
    # model_threshold=thresholds,
    return_df=False,
    # overall_only=True,
    # decimal_places=2,
)

# print(model_summary_lasso)
# model_summary_lasso

Model Performance Metrics
-------------------------------------------------------------------------------------------------------------------
             Model |            Metric |      MAE |    MAPE |        MSE |     RMSE |   Expl. Var. |   R^2 Score
-------------------------------------------------------------------------------------------------------------------
  Lasso Regression |   Overall Metrics |   53.549 |   42.74 |   4106.445 |   64.082 |        0.316 |        0.29
-------------------------------------------------------------------------------------------------------------------


In [5]:
# Load dataset
diabetes = load_diabetes(as_frame=True)["frame"]
X = diabetes.drop(columns=["target"])
y = diabetes["target"]

# Split into train and test sets
X_train, X_test, y_train, y_test = train_test_split(
    X,
    y,
    test_size=0.2,
    random_state=42,
)

# Train Linear Regression (on unscaled data)
linear_model = LinearRegression()
linear_model.fit(X_train, y_train)

# Train Random Forest Regressor (on unscaled data)
rf_model = RandomForestRegressor(
    n_estimators=100,
    random_state=42,
)
rf_model.fit(X_train, y_train)

# Train Ridge Regression (on scaled data)
ridge_model = Pipeline(
    [
        ("scaler", StandardScaler()),
        ("estimator", Ridge(alpha=1.0)),
    ]
)
ridge_model.fit(X_train, y_train)

# Evaluate models using the correct input format
print("Linear Regression Score:", linear_model.score(X_test, y_test))  # Unscaled
print("Ridge Regression Score:", ridge_model.score(X_test, y_test))  # Scaled
print("Random Forest Score:", rf_model.score(X_test, y_test))  # Unscaled

Linear Regression Score: 0.4526027629719195
Ridge Regression Score: 0.45414652070698225
Random Forest Score: 0.4428225673999313


In [6]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[linear_model, ridge_model],
    model_titles=[
        "Linear Regression",
        "Ridge Regression",
    ],
    X=X_test,
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics

Unnamed: 0,Model,Metric,Variable,Coefficient,MAE,MAPE,MSE,RMSE,Expl. Var.,R^2 Score
,Linear Regression,Overall Metrics,,,42.794,37.5,2900.194,53.853,0.455,0.453
,Linear Regression,Coefficient,const,151.346,,,,,,
,Linear Regression,Coefficient,age,37.904,,,,,,
,Linear Regression,Coefficient,sex,-241.964,,,,,,
,Linear Regression,Coefficient,bmi,542.429,,,,,,
,Linear Regression,Coefficient,bp,347.704,,,,,,
,Linear Regression,Coefficient,s1,-931.489,,,,,,
,Linear Regression,Coefficient,s2,518.062,,,,,,
,Linear Regression,Coefficient,s3,163.42,,,,,,
,Linear Regression,Coefficient,s4,275.318,,,,,,


In [7]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[rf_model],
    model_titles=["Random Forest"],
    X=X_test,  # Now Ridge gets the scaled version while others get unscaled
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics

Unnamed: 0,Model,Metric,Feat. Imp.,MAE,MAPE,MSE,RMSE,Expl. Var.,R^2 Score
,Random Forest,Overall Metrics,,44.053,40.005,2952.011,54.332,0.443,0.443
,Random Forest,Feat. Imp.,0.059,,,,,,
,Random Forest,Feat. Imp.,0.01,,,,,,
,Random Forest,Feat. Imp.,0.355,,,,,,
,Random Forest,Feat. Imp.,0.088,,,,,,
,Random Forest,Feat. Imp.,0.053,,,,,,
,Random Forest,Feat. Imp.,0.057,,,,,,
,Random Forest,Feat. Imp.,0.051,,,,,,
,Random Forest,Feat. Imp.,0.024,,,,,,
,Random Forest,Feat. Imp.,0.231,,,,,,


In [8]:
# Run summarize_model_performance once
regression_metrics = summarize_model_performance(
    model=[linear_model, ridge_model, rf_model],
    model_titles=[
        "Linear Regression",
        "Ridge Regression",
        "Random Forest",
    ],
    X=X_test,  # Now Ridge gets the scaled version while others get unscaled
    y=y_test,
    model_type="regression",
    return_df=True,
    overall_only=False,
)

# Display final table
regression_metrics

Unnamed: 0,Model,Metric,Variable,Coefficient,Feat. Imp.,MAE,MAPE,MSE,RMSE,Expl. Var.,R^2 Score
,Linear Regression,Overall Metrics,,,,42.794,37.5,2900.194,53.853,0.455,0.453
,Linear Regression,Coefficient,const,151.346,,,,,,,
,Linear Regression,Coefficient,age,37.904,,,,,,,
,Linear Regression,Coefficient,sex,-241.964,,,,,,,
,Linear Regression,Coefficient,bmi,542.429,,,,,,,
,Linear Regression,Coefficient,bp,347.704,,,,,,,
,Linear Regression,Coefficient,s1,-931.489,,,,,,,
,Linear Regression,Coefficient,s2,518.062,,,,,,,
,Linear Regression,Coefficient,s3,163.42,,,,,,,
,Linear Regression,Coefficient,s4,275.318,,,,,,,
