In [1]:
import os

In [2]:
%pwd

'/Users/josianetsh/Downloads/josie-end-to-end-mlops-project/research'

In [3]:
os.chdir("../")

In [4]:
%pwd

'/Users/josianetsh/Downloads/josie-end-to-end-mlops-project'

In [5]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class HyperparameterConfig:
    root_dir: Path
    data_path: Path

In [6]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [7]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_hyperparameter_config(self) -> HyperparameterConfig:
        config = self.config.hyperparameter

        create_directories([config.root_dir])

        hyperparameter_config = HyperparameterConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )

        return hyperparameter_config

In [8]:
import numpy as np
import ray
from ray import tune
from ray.train import report
from ray.tune.schedulers import ASHAScheduler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import ElasticNet
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler

In [9]:
import pandas as pd
class HyperparameterTuning:
    def __init__(self, config: HyperparameterConfig):
        self.config = config


    def hypertuning(self): 
        data = pd.read_csv(self.config.data_path)
        X = data.drop(columns=['quality']) 
        y = data['quality']
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        return X_train, y_train

    def objective(self, config):
        X_train, y_train = self.hypertuning()
        
        model = ElasticNet(alpha=config["alpha"], l1_ratio=config["l1_ratio"], random_state=42)
        score = cross_val_score(model, X_train, y_train, cv=5, scoring="r2").mean()
        report({"r2_score": score})

    def run_tuning(self):
        search_space = {
            "alpha": tune.loguniform(1e-3, 1e1),  # Search between 0.001 and 10
            "l1_ratio": tune.uniform(0, 1)        # Search between 0 and 1
        }

        ray.init(ignore_reinit_error=True)  # Initialize Ray
        scheduler=ASHAScheduler()
        analysis = tune.run(
            self.objective,  # Function to optimize
            config=search_space,
            metric="r2_score",
            mode="max",  # Maximize R² score
            num_samples=50,  # Number of trials
            scheduler=scheduler
        )

        best_params = analysis.best_config
        print(f"Best Hyperparameters Found: {best_params}")

        return best_params  # Return the best parameters

In [None]:
try:
    config = ConfigurationManager()
    hyperparameter_config = config.get_hyperparameter_config()
    hyperparameter = HyperparameterTuning(config=hyperparameter_config)
    best_params = hyperparameter.run_tuning()
    print(f"Best Found Parameters: {best_params}")
except Exception as e:
    raise e

In [None]:
# 🚀 Step 1: Load and Preprocess Dataset
data = load_wine()
X = data.data  # Features
y = data.target  # Target variable

scaler = StandardScaler()
X = scaler.fit_transform(X)  # Standardize features

# Split data into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
def objective(config):
    """Train ElasticNet model with given hyperparameters and evaluate."""
    model = ElasticNet(alpha=config["alpha"], l1_ratio=config["l1_ratio"], random_state=42)
    
    # Perform cross-validation to evaluate model performance
    score = cross_val_score(model, X_train, y_train, cv=5, scoring="r2").mean()
    
    # Report the score to Ray Tune
    report({"r2_score": score})

In [None]:
# 🚀 Step 3: Define Search Space for Hyperparameters
search_space = {
    "alpha": tune.loguniform(1e-3, 1e1),  # Search between 0.001 and 10
    "l1_ratio": tune.uniform(0, 1)        # Search between 0 and 1
}

In [None]:
# 🚀 Step 4: Run Hyperparameter Tuning **Before Final Training**
ray.init(ignore_reinit_error=True)  # Initialize Ray
scheduler=ASHAScheduler()
analysis = tune.run(
    objective,  # Function to optimize
    config=search_space,
    metric="r2_score",
    mode="max",  # Maximize R² score
    num_samples=50,  # Number of trials
    scheduler=scheduler
)

2025-02-02 13:15:44,556	INFO worker.py:1476 -- Calling ray.init() again after it has already been called.
2025-02-02 13:15:44,557	INFO tune.py:645 -- [output] This uses the legacy output and progress reporter, as Jupyter notebooks are not supported by the new engine, yet. For more information, please see https://github.com/ray-project/ray/issues/36949


0,1
Current time:,2025-02-02 13:15:47
Running for:,00:00:03.00
Memory:,14.3/18.0 GiB

Trial name,status,loc,alpha,l1_ratio,iter,total time (s),r2_score
objective_b8050_00000,TERMINATED,127.0.0.1:71452,0.0394204,0.333384,1,0.00355911,0.870223
objective_b8050_00001,TERMINATED,127.0.0.1:71449,0.548407,0.00777416,1,0.00366807,0.839826
objective_b8050_00002,TERMINATED,127.0.0.1:71450,0.989669,0.194223,1,0.00521278,0.634392
objective_b8050_00003,TERMINATED,127.0.0.1:71451,0.0036066,0.895322,1,0.0128603,0.871651
objective_b8050_00004,TERMINATED,127.0.0.1:71452,0.0862072,0.76955,1,0.00373316,0.840103
objective_b8050_00005,TERMINATED,127.0.0.1:71453,0.0359554,0.933261,1,0.00398993,0.862316
objective_b8050_00006,TERMINATED,127.0.0.1:71454,0.0259093,0.794674,1,0.00353408,0.8676
objective_b8050_00007,TERMINATED,127.0.0.1:71453,0.00546309,0.90232,1,0.00349116,0.871926
objective_b8050_00008,TERMINATED,127.0.0.1:71456,0.0173314,0.956259,1,0.00519896,0.86867
objective_b8050_00009,TERMINATED,127.0.0.1:71457,0.253955,0.639989,1,0.00715017,0.755848


Trial name,r2_score
objective_b8050_00000,0.870223
objective_b8050_00001,0.839826
objective_b8050_00002,0.634392
objective_b8050_00003,0.871651
objective_b8050_00004,0.840103
objective_b8050_00005,0.862316
objective_b8050_00006,0.8676
objective_b8050_00007,0.871926
objective_b8050_00008,0.86867
objective_b8050_00009,0.755848


2025-02-02 13:15:47,423	ERROR worker.py:405 -- Unhandled error (suppress with 'RAY_IGNORE_UNHANDLED_ERRORS=1'): The worker died unexpectedly while executing this task. Check python-core-worker-*.log files for more information.
2025-02-02 13:15:47,564	INFO tune.py:1143 -- Total run time: 3.01 seconds (2.98 seconds for the tuning loop).


In [None]:
# Get the best hyperparameters
best_params = analysis.best_config
print(f"Best Hyperparameters Found: {best_params}")

# 🚀 Step 5: Train Final Model with Best Hyperparameters
best_model = ElasticNet(alpha=best_params["alpha"], l1_ratio=best_params["l1_ratio"], random_state=42)
best_model.fit(X_train, y_train)

# 🚀 Step 6: Evaluate Final Model
final_score = best_model.score(X_test, y_test)
print(f"Final Model R² Score: {final_score:.4f}")

Best Hyperparameters Found: {'alpha': 0.02972918069245933, 'l1_ratio': 0.05439806050504137}
Final Model R² Score: 0.8815
