# Import Files

In [1]:
import os

import pandas as pd
import numpy as np
from pathlib import Path

from datetime import datetime

from sklearn.linear_model import Lasso, Ridge, LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

ModuleNotFoundError: No module named 'pandas'

# Settings

In [None]:
# path to where we store our preprocessed data
data_file_path = Path("../data")

# target variable
TARGET_VAR = "price_per_dozen"

def test_model(model, X_train, y_train, X_test, y_test):
    """
    Trains the given model, makes predictions, and evaluates performance.

    Args:
        model: The ML model to train.
        X_train: Training features.
        y_train: Training labels.
        X_test: Test features.
        y_test: Test labels.
    """
    model.fit(X_train, y_train)
    y_pred_train = model.predict(X_train)
    y_pred_test = model.predict(X_test)

    print(f"\n{model.__class__.__name__} on Enhanced Features:")
    print(f"Train Accuracy: {accuracy_score(y_train, y_pred_train):.4f}")
    print(f"Test Accuracy:  {accuracy_score(y_test, y_pred_test):.4f}")
    
    mse = mean_squared_error(y_test, y_pred_test)
    rmse = np.sqrt(mse)
    mae = mean_absolute_error(y_test, y_pred_test)
    r2 = r2_score(y_test, y_pred_test)

    # Store metrics in a dictionary
    model_results = {
        "Model": model.__class__.__name__,
        "Mean Squared Error": mse,
        "Root Mean Squared Error": rmse,
        "Mean Absolute Error": mae,
        "r-squared": r2
    }

    # Convert metrics into DataFrame for readability
    df_metrics = pd.DataFrame([model_results])

    print("\nEvaluation Metrics:")
    print(df_metrics.to_string(index=False))

    return model_results

# Load Data

In [None]:
dataset = pd.read_csv(f'{data_file_path}/data_ready_for_model.csv') # change the merged egg price 

In [None]:
# Extract features (X) by dropping the target variable
X = dataset.drop([TARGET_VAR], axis=1)
# Extract the target variable (Y)
Y = dataset.loc[:, TARGET_VAR:TARGET_VAR]

# train test split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3, random_state=42)

# Initialize models
models = {
    "Linear Regression": LinearRegression(),
    "Ridge Regression": Ridge(alpha=1.0),
    "Lasso Regression": Lasso(alpha=0.1)
}

# Dictionary to store results
results = {}

for name, model in models.items():
    results[name] = test_model(model, X_train, y_train, X_test, y_test)