In [16]:
import os

In [17]:
%pwd

'/Users/josianetsh/Downloads/josie-end-to-end-mlops-project'

In [18]:
os.chdir("../")

In [19]:
%pwd

'/Users/josianetsh/Downloads'

In [20]:
from dataclasses import dataclass
from pathlib import Path


@dataclass(frozen=True)
class HyperparameterConfig:
    root_dir: Path
    data_path: Path

In [21]:
from mlProject.constants import *
from mlProject.utils.common import read_yaml, create_directories

In [22]:
class ConfigurationManager:
    def __init__(
        self,
        config_filepath = CONFIG_FILE_PATH,
        params_filepath = PARAMS_FILE_PATH,
        schema_filepath = SCHEMA_FILE_PATH):

        self.config = read_yaml(config_filepath)
        self.params = read_yaml(params_filepath)
        self.schema = read_yaml(schema_filepath)

        create_directories([self.config.artifacts_root])


    
    def get_hyperparameter_config(self) -> HyperparameterConfig:
        config = self.config.hyperparameter

        create_directories([config.root_dir])

        hyperparameter_config = HyperparameterConfig(
            root_dir=config.root_dir,
            data_path=config.data_path
        )

        return hyperparameter_config

In [23]:
import numpy as np
import ray
from ray import tune
from ray.train import report
from ray.tune.schedulers import ASHAScheduler
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.linear_model import ElasticNet
from sklearn.datasets import load_wine
from sklearn.preprocessing import StandardScaler

In [24]:
import pandas as pd
data = pd.read_csv('artifacts/data_ingestion/winequality-red.csv')

X = data.drop(columns=['quality']) 
y = data['quality']
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)



FileNotFoundError: [Errno 2] No such file or directory: 'artifacts/data_ingestion/winequality-red.csv'

In [None]:
def objective(config):
    """Train ElasticNet model with given hyperparameters and evaluate."""
    model = ElasticNet(alpha=config["alpha"], l1_ratio=config["l1_ratio"], random_state=42)
    
    # Perform cross-validation to evaluate model performance
    score = cross_val_score(model, X_train, y_train, cv=5, scoring="r2").mean()
    
    # Report the score to Ray Tune
    report({"r2_score": score})

In [None]:
# 🚀 Step 3: Define Search Space for Hyperparameters
search_space = {
    "alpha": tune.loguniform(1e-3, 1e1),  # Search between 0.001 and 10
    "l1_ratio": tune.uniform(0, 1)        # Search between 0 and 1
}

In [None]:

ray.init(ignore_reinit_error=True)  # Initialize Ray

scheduler=ASHAScheduler()
analysis = tune.run(
    objective,  # Function to optimize
    config=search_space,
    metric="r2_score",
    mode="max",  # Maximize R² score
    num_samples=50,  # Number of trials
    scheduler=scheduler
)

In [27]:
best_params = analysis.best_config
print(f"Best Hyperparameters Found: {best_params}")

# 🚀 Step 5: Train Final Model with Best Hyperparameters
best_model = ElasticNet(alpha=best_params["alpha"], l1_ratio=best_params["l1_ratio"], random_state=42)
best_model.fit(X_train, y_train)

# 🚀 Step 6: Evaluate Final Model
final_score = best_model.score(X_test, y_test)
print(f"Final Model R² Score: {final_score:.4f}")

Best Hyperparameters Found: {'alpha': 0.011981961403437809, 'l1_ratio': 0.6834636263051413}
Final Model R² Score: 0.3990


In [None]:
import pandas as pd
class HyperparameterTuning:
    def __init__(self, config: HyperparameterConfig):
        self.config = config


    def hypertuning(self): 
        data = pd.read_csv(self.config.data_path)
        X = data.drop(columns=['quality']) 
        y = data['quality']
        scaler = StandardScaler()
        X = scaler.fit_transform(X)
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
        return X_train, y_train

    def objective(self, config):
        X_train, y_train = self.hypertuning()
        
        model = ElasticNet(alpha=config["alpha"], l1_ratio=config["l1_ratio"], random_state=42)
        score = cross_val_score(model, X_train, y_train, cv=5, scoring="r2").mean()
        report({"r2_score": score})

    def run_tuning(self):
        search_space = {
            "alpha": tune.loguniform(1e-3, 1e1),  # Search between 0.001 and 10
            "l1_ratio": tune.uniform(0, 1)        # Search between 0 and 1
        }

        ray.init(ignore_reinit_error=True)  # Initialize Ray
        scheduler=ASHAScheduler()
        analysis = tune.run(
            self.objective,  # Function to optimize
            config=search_space,
            metric="r2_score",
            mode="max",  # Maximize R² score
            num_samples=50,  # Number of trials
            scheduler=scheduler
        )

        best_params = analysis.best_config
        print(f"Best Hyperparameters Found: {best_params}")

        return best_params  # Return the best parameters

In [None]:
try:
    config = ConfigurationManager()
    hyperparameter_config = config.get_hyperparameter_config()
    hyperparameter = HyperparameterTuning(config=hyperparameter_config)
    best_params = hyperparameter.run_tuning()
    print(f"Best Found Parameters: {best_params}")
except Exception as e:
    raise e