In [1]:

# Data and modelling
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score

# Load the dataset
data = load_breast_cancer()
X, y = data.data, data.target

# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)

# Train an SVM classifier
svm = SVC(kernel='linear', random_state=42)
svm.fit(X_train, y_train)

# Evaluate the model
y_pred = svm.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
print(f"Model Accuracy: {accuracy:.2f}")


Model Accuracy: 0.96


In [3]:
# Hyperparameter Tuning with MLFLOW integrated
#import os
#os.environ['MLFLOW_EXPERIMENTAL_DISABLE_ENV_MANAGER'] = '1'

from sklearn.model_selection import GridSearchCV
import mlflow
import mlflow.sklearn

# Define the parameter grid
param_grid = {
    'C': [0.1, 1, 10],
    'kernel': ['linear', 'rbf'],
    'gamma': [0.01, 0.1, 1]
}

# Initialize GridSearchCV
grid_search = GridSearchCV(SVC(random_state=42), param_grid, scoring='accuracy', cv=7)

# Start MLflow tracking
with mlflow.start_run(run_name="SVM Hyperparameter Tuning", nested=True):
    grid_search.fit(X_train, y_train)
    
    # Log hyperparameters and metrics
    mlflow.log_params(grid_search.best_params_)
    mlflow.log_metric("best_accuracy", grid_search.best_score_)
    
    # Log the model
    best_model = grid_search.best_estimator_
    mlflow.sklearn.log_model(best_model, "model")
    
    print(f"Best Parameters: {grid_search.best_params_}")
    print(f"Best Accuracy: {grid_search.best_score_:.2f}")

ModuleNotFoundError: No module named 'mlflow'