# Step 1: Install and Set Up Environment

In [None]:
# Install dependencies
# !pip install mlflow scikit-learn pandas seaborn jupyter

# Step 2: Import Necessary Libraries

In [None]:
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor, GradientBoostingRegressor
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score
import pandas as pd
import seaborn as sns
from mlflow.models.signature import infer_signature
import numpy as np

# Step 3: Load Data

In [None]:
url = "https://archive.ics.uci.edu/ml/machine-learning-databases/wine-quality/winequality-red.csv"

df = pd.read_csv(url, sep=';')
df.head()

In [None]:
# Separate Features and Target
X = df.drop("quality", axis=1)
y = df["quality"]

# Step 4: Train-Test Split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Step 5: Define Models and Hyperparameters


In [None]:
models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=100, max_depth=5, random_state=42),
    "GradientBoosting": GradientBoostingRegressor(n_estimators=100, learning_rate=0.1, random_state=42)
}


# Step 6: Set Experiment

In [None]:
# Set or get the experiment
experiment = mlflow.set_experiment("Wine_Quality_Prediction_exp")

# Print experiment details
print(f"""
Name              : {experiment.name}
ID                : {experiment.experiment_id}
Artifact Location : {experiment.artifact_location}
Lifecycle Stage   : {experiment.lifecycle_stage}
Creation Time     : {experiment.creation_time}
Last Update Time  : {experiment.last_update_time}
Tags              : {experiment.tags}
""")


# Step 7: Training and Logging with MLflow

In [None]:
run_ids = {}  # To store run IDs for each model

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name) as run:
        model.fit(X_train, y_train)
        preds = model.predict(X_test)

        rmse = np.sqrt(mean_squared_error(y_test, preds))
        r2 = r2_score(y_test, preds)

        mlflow.log_param("model_type", model_name)

        if model_name != "LinearRegression":
            for param, value in model.get_params().items():
                mlflow.log_param(param, value)

        mlflow.log_metric("rmse", rmse)
        mlflow.log_metric("r2_score", r2)

        input_example = X_test[:1]
        signature = infer_signature(X_test, preds)

        mlflow.sklearn.log_model(
            sk_model=model,
            artifact_path=model_name,
            input_example=input_example,
            signature=signature
        )

        print(f"{model_name} - RMSE: {rmse:.4f} | R2: {r2:.4f}")

        # Save run ID for later
        run_ids[model_name] = run.info.run_id


# Step 8: Load and Use the Model from MLflow

In [None]:
# Load model from MLflow using the run ID
run_id = "YOUR_COPIED_RUN_ID"
model_uri = f"runs:/{run_id}/GradientBoosting"
loaded_model = mlflow.sklearn.load_model(model_uri)

# Use the model for prediction
loaded_preds = loaded_model.predict(X_test)

# Step 9: Register the Model

In [None]:
# Register the model in the registry
mlflow.register_model(
    model_uri = f"runs:/{run_id}/GradientBoosting",
    name="WineQualityPredictor-GB"
)

# Step 10: Tag Model Versions

In [None]:
from mlflow.tracking import MlflowClient
version_to_tag = 1

client = MlflowClient()
client.set_model_version_tag(
    name="WineQualityPredictor-GB",
    version=version_to_tag,  # Specify the version number
    key="stage",
    value="production"
)