<a href="https://colab.research.google.com/github/mimrancomsats/ProgrammingforAI_FALL24/blob/main/Lab_15_MLFlow/Lab_15MLflow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **MLFlow**

**MLflow is an open-source platform, purpose-built to assist machine learning practitioners and teams in handling the complexities of the machine learning process. MLflow focuses on the full lifecycle for machine learning projects, ensuring that each phase is manageable, traceable, and reproducible.**

**In this notebook, we are going to use MLFlow for two purposes: Experiment Tracking and Model Inference**

*   **Experiment Tracking**
*   **Model Inference**

**The installation process of MLFlow is described in the following link:**




https://mlflow.org/docs/latest/getting-started/intro-quickstart/index.html

# **MLFlow Library Installation**

In [None]:
!pip install --quiet mlflow

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m27.4/27.4 MB[0m [31m58.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.9/5.9 MB[0m [31m91.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m233.5/233.5 kB[0m [31m14.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m147.8/147.8 kB[0m [31m8.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m114.9/114.9 kB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m85.0/85.0 kB[0m [31m5.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m623.0/623.0 kB[0m [31m32.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m203.2/203.2 kB[0m [31m14.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

# **Sklearn Pipeline Implementation (KNN)**

In [None]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
data = pd.read_csv('titanic.csv')

# Custom function to impute missing values in 'Embarked' column
def impute_embarked(X):
    X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])  # Fill missing values
    return X

# Custom function to create the 'FamilySize' feature
def create_family_size(X):
    X['FamilySize'] = X['SibSp'] + X['Parch'] + 1  # Add 1 for the individual themselves
    return X

# Custom function to drop columns that are not needed for model training
def drop_columns(X):
    return X.drop(['SibSp', 'Parch'], axis=1)

# Function to create 'FamilySize' and drop 'SibSp' and 'Parch' columns
def family_size(X):
    X = create_family_size(X)
    X = drop_columns(X)
    return X

# Pipeline to preprocess 'Age' column
age_pipeline = Pipeline(steps=[
    ('age_imputer', SimpleImputer(strategy='mean')),  # Impute missing 'Age' values
    ('age_scaler', MinMaxScaler())  # Scale 'Age' feature
])

# Pipeline to preprocess 'Fare' column
fare_pipeline = Pipeline(steps=[
    #('fare_imputer', SimpleImputer(strategy='mean')),  # Optionally impute missing 'Fare'
    ('fare_scaler', MinMaxScaler())  # Scale 'Fare' feature
])

# Pipeline to create and scale the 'FamilySize' feature
family_size_pipeline = Pipeline(steps=[
    ('family_size_creator', FunctionTransformer(family_size)),
    ('family_size_scaler', MinMaxScaler()),  # Scale 'FamilySize'
])

# Pipeline to preprocess 'Embarked' column
embarked_pipeline = Pipeline(steps=[
    ('embarked_imputer', FunctionTransformer(impute_embarked)),  # Impute missing 'Embarked' values
    ('embarked_onehot', OneHotEncoder())  # One-hot encode 'Embarked'
])

# Create a ColumnTransformer to preprocess all relevant features
knn_preprocessor = ColumnTransformer(transformers=[
    ('drop', 'drop', ['PassengerId', 'Name', 'Ticket', 'Cabin']),  # Drop irrelevant columns
    ('age_encoder', age_pipeline, ['Age']),  # Preprocess 'Age'
    ('fare_encoder', fare_pipeline, ['Fare']),  # Preprocess 'Fare'
    ('family_size', family_size_pipeline, ['SibSp', 'Parch']),  # Preprocess 'FamilySize'
    ('embarked_encoder', embarked_pipeline, ['Embarked']),  # Preprocess 'Embarked'
    ('sex_encoder', OneHotEncoder(), ['Sex']),  # One-hot encode 'Sex'
    ('pclass_scaler', MinMaxScaler(), ['Pclass']),  # Scale 'Pclass'
], remainder='passthrough')

# Create a complete pipeline with preprocessing and the KNN classifier
knn_pipeline = Pipeline(steps=[
    ('knn_preprocessor', knn_preprocessor),  # Data preprocessing steps
    ('knn_classifier', KNeighborsClassifier(n_neighbors=5))  # KNN Classifier
])

# Separate features and target variable
X = data.drop('Survived', axis=1)
y = data['Survived']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline on the training data
knn_pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = knn_pipeline.predict(X_test)

# Evaluate the model performance
knn_accuracy = accuracy_score(y_test, y_pred)
print(f"\nKNN Model Accuracy: {knn_accuracy:.2f}")

# Confusion matrix for evaluating the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification report (detailed evaluation)
#knn_report_dict = classification_report(y_test, y_pred, output_dict=True)
#print("\nKNN Classification Report:")
#print(knn_report_dict)



KNN Model Accuracy: 0.80
Confusion Matrix:
[[90 15]
 [21 53]]


# **Experiment Tracking**

In [None]:
import mlflow
import mlflow.sklearn

# Set the tracking URI and experiment name
mlflow.set_tracking_uri(uri="http://3.91.21.217:5000")
mlflow.set_experiment("KNN Experiment")

# Start a new MLflow run
with mlflow.start_run():

    # Log the prameters related to KNN model
    mlflow.log_param("model","KNN")
    mlflow.log_param("n_neighbors", 5)
    mlflow.log_param("metric", 'minkowski')

    # Log the accuracy metric
    mlflow.log_metric("accuracy", knn_accuracy)

    # Log the KNN model (use the knn_pipeline variable)
    mlflow.sklearn.log_model(knn_pipeline, "KNN Algorithm")


2024/12/18 12:35:16 INFO mlflow.tracking.fluent: Experiment with name 'KNN Experiment' does not exist. Creating a new experiment.


🏃 View run useful-eel-882 at: http://3.91.21.217:5000/#/experiments/592108578757940918/runs/ce6c402698ad4d34bffa684e4d086fba
🧪 View experiment at: http://3.91.21.217:5000/#/experiments/592108578757940918


# **Sklearn Pipeline Implementation (Decision Tree)**

In [None]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
data = pd.read_csv('titanic.csv')

# Custom function to impute missing values in the 'Embarked' column
def impute_embarked(X):
    X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])  # Fill missing values
    return X

# Custom function to create the 'FamilySize' feature
def create_family_size(X):
    X['FamilySize'] = X['SibSp'] + X['Parch'] + 1  # Adding 1 for the individual themselves
    return X

# Custom function to drop specified columns
def drop_columns(X):
    return X.drop(['SibSp', 'Parch'], axis=1)

# Function to create 'FamilySize' and drop 'SibSp' and 'Parch' columns
def family_size(X):
    X = create_family_size(X)
    X = drop_columns(X)
    return X

# Create pipelines for 'Age'
age_pipeline = Pipeline(steps=[
    ('age_imputer', SimpleImputer(strategy='mean')),  # Impute missing 'Age' values
    ('age_scaler', MinMaxScaler())  # Scale 'Age' feature
])

# Create pipelines for 'Fare'
fare_pipeline = Pipeline(steps=[
    ('fare_scaler', MinMaxScaler())  # Scale 'Fare' feature
])

# Create pipelines for 'FamilySize'
family_size_pipeline = Pipeline(steps=[
    ('family_size_creator', FunctionTransformer(family_size)),
    ('family_size_scaler', MinMaxScaler())  # Scale 'FamilySize' feature
])

# Create pipelines for 'Embarked'
embarked_pipeline = Pipeline(steps=[
    ('embarked_imputer', FunctionTransformer(impute_embarked)),  # Impute missing 'Embarked' values
    ('embarked_onehot', OneHotEncoder())  # One-hot encode 'Embarked'
])

# Create a ColumnTransformer to preprocess the data
dt_preprocessor = ColumnTransformer(transformers=[
    ('drop', 'drop', ['PassengerId', 'Name', 'Ticket', 'Cabin']),  # Drop irrelevant columns
    ('age_encoder', age_pipeline, ['Age']),  # Preprocess 'Age'
    ('fare_encoder', fare_pipeline, ['Fare']),  # Preprocess 'Fare'
    ('family_size', family_size_pipeline, ['SibSp', 'Parch']),  # Preprocess 'FamilySize'
    ('embarked_encoder', embarked_pipeline, ['Embarked']),  # Preprocess 'Embarked'
    ('sex_encoder', OneHotEncoder(), ['Sex']),  # One-hot encode 'Sex'
    ('pclass_scaler', MinMaxScaler(), ['Pclass']),  # Scale 'Pclass'
], remainder='passthrough')

# Create a complete pipeline that includes preprocessing and the Decision Tree classifier
dt_pipeline = Pipeline(steps=[
    ('dt_preprocessor', dt_preprocessor),  # Data preprocessing steps
    ('dt_classifier', DecisionTreeClassifier(random_state=42))  # Decision Tree Classifier
])

# Separate features and target variable
X = data.drop('Survived', axis=1)
y = data['Survived']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline on the training data
dt_pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = dt_pipeline.predict(X_test)

# Evaluate the model performance
dt_accuracy = accuracy_score(y_test, y_pred)
print(f"\nDecision Tree Model Accuracy: {dt_accuracy:.2f}")

# Confusion matrix for evaluating the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification report (detailed evaluation)
#dt_report_dict = classification_report(y_test, y_pred, output_dict=True)
#print("\nDecision Tree Classification Report:")
#print(dt_report_dict)



Decision Tree Model Accuracy: 0.77
Confusion Matrix:
[[82 23]
 [19 55]]


# **Experiment Tracking**

In [None]:
import mlflow
import mlflow.sklearn

# Set the tracking URI and experiment name
mlflow.set_tracking_uri(uri="http://3.91.21.217:5000")
mlflow.set_experiment("Decision Tree Experiment")

# Start a new MLflow run
with mlflow.start_run():

    # Log the parameters related to Decision Tree model
    mlflow.log_param("model","Decision Tree")
    mlflow.log_param("criterion", "gini")
    mlflow.log_param("random_state", 42)

    # Log the accuracy metric
    mlflow.log_metric("accuracy", dt_accuracy)

    # Log the Decision Tree model (use the dt_pipeline variable)
    mlflow.sklearn.log_model(dt_pipeline, "Decision Tree Algorithm")


2024/12/18 12:35:38 INFO mlflow.tracking.fluent: Experiment with name 'Decision Tree Experiment' does not exist. Creating a new experiment.


🏃 View run able-cod-526 at: http://3.91.21.217:5000/#/experiments/191437345984426865/runs/0ca2dc6c60344c70a8513a409e7d1b3f
🧪 View experiment at: http://3.91.21.217:5000/#/experiments/191437345984426865


# **Sklearn Pipeline Implementation (Random Forest)**

In [None]:
import pandas as pd
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier  # Import RandomForestClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

# Load the dataset
data = pd.read_csv('titanic.csv')

# Custom function to impute missing values in the 'Embarked' column
def impute_embarked(X):
    X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])  # Fill missing values
    return X

# Custom function to create the 'FamilySize' feature
def create_family_size(X):
    X['FamilySize'] = X['SibSp'] + X['Parch'] + 1  # Adding 1 for the individual themselves
    return X

# Custom function to drop specified columns
def drop_columns(X):
    return X.drop(['SibSp', 'Parch'], axis=1)

# Function to create 'FamilySize' and drop 'SibSp' and 'Parch' columns
def family_size(X):
    X = create_family_size(X)
    X = drop_columns(X)
    return X

# Create pipelines for 'Age'
age_pipeline = Pipeline(steps=[
    ('age_imputer', SimpleImputer(strategy='mean')),  # Impute missing 'Age' values
    ('age_scaler', MinMaxScaler())  # Scale 'Age' feature
])

# Create pipelines for 'Fare'
fare_pipeline = Pipeline(steps=[
    ('fare_scaler', MinMaxScaler())  # Scale 'Fare' feature
])

# Create pipelines for 'FamilySize'
family_size_pipeline = Pipeline(steps=[
    ('family_size_creator', FunctionTransformer(family_size)),
    ('family_size_scaler', MinMaxScaler())  # Scale 'FamilySize' feature
])

# Create pipelines for 'Embarked'
embarked_pipeline = Pipeline(steps=[
    ('embarked_imputer', FunctionTransformer(impute_embarked)),  # Impute missing 'Embarked' values
    ('embarked_onehot', OneHotEncoder())  # One-hot encode 'Embarked'
])

# Create a ColumnTransformer to preprocess the data
rf_preprocessor = ColumnTransformer(transformers=[
    ('drop', 'drop', ['PassengerId', 'Name', 'Ticket', 'Cabin']),  # Drop irrelevant columns
    ('age_encoder', age_pipeline, ['Age']),  # Preprocess 'Age'
    ('fare_encoder', fare_pipeline, ['Fare']),  # Preprocess 'Fare'
    ('family_size', family_size_pipeline, ['SibSp', 'Parch']),  # Preprocess 'FamilySize'
    ('embarked_encoder', embarked_pipeline, ['Embarked']),  # Preprocess 'Embarked'
    ('sex_encoder', OneHotEncoder(), ['Sex']),  # One-hot encode 'Sex'
    ('pclass_scaler', MinMaxScaler(), ['Pclass']),  # Scale 'Pclass'
], remainder='passthrough')

# Create a complete pipeline that includes preprocessing and the Random Forest classifier
rf_pipeline = Pipeline(steps=[
    ('rf_preprocessor', rf_preprocessor),  # Data preprocessing steps
    ('rf_classifier', RandomForestClassifier(random_state=42))  # Random Forest Classifier
])

# Separate features and target variable
X = data.drop('Survived', axis=1)
y = data['Survived']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Fit the pipeline on the training data
rf_pipeline.fit(X_train, y_train)

# Make predictions on the test set
y_pred = rf_pipeline.predict(X_test)

# Evaluate the model performance
rf_accuracy = accuracy_score(y_test, y_pred)
print(f"\nRandom Forest Model Accuracy: {rf_accuracy:.2f}")

# Confusion matrix for evaluating the model
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))

# Classification report (detailed evaluation)
#rf_report_dict = classification_report(y_test, y_pred, output_dict=True)
#print("\nRandom Forest Classification Report:")
#print(rf_report_dict)



Random Forest Model Accuracy: 0.82
Confusion Matrix:
[[91 14]
 [19 55]]


# **Experiment Tracking**

In [None]:
import mlflow
import mlflow.sklearn

# Set the tracking URI and experiment name for Random Forest
mlflow.set_tracking_uri(uri="http://3.91.21.217:5000")
mlflow.set_experiment("Random Forest Experiment")

# Start a new MLflow run
with mlflow.start_run():

    # Log the hyperparameters
    mlflow.log_param("model","Random Forest")
    mlflow.log_param("n_estimators", 100)
    mlflow.log_param("random_state", 42)

    # Log the accuracy metric
    mlflow.log_metric("accuracy", rf_accuracy)

    # Log the Random Forest model (use the rf_pipeline variable)
    mlflow.sklearn.log_model(rf_pipeline, "Random Forest Algorithm")


2024/12/18 12:35:45 INFO mlflow.tracking.fluent: Experiment with name 'Random Forest Experiment' does not exist. Creating a new experiment.


🏃 View run efficient-gnat-96 at: http://3.91.21.217:5000/#/experiments/532351182075092251/runs/ab56cd620c6449e3928b39184d0ccc05
🧪 View experiment at: http://3.91.21.217:5000/#/experiments/532351182075092251


# **Sklearn Pipeline Implementation (ANN)**

In [None]:
import pandas as pd
import tensorflow as tf
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import FunctionTransformer, OneHotEncoder, MinMaxScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix

# Load the dataset
data = pd.read_csv('titanic.csv')

# Define custom functions for preprocessing
def impute_embarked(X):
    X['Embarked'] = X['Embarked'].fillna(X['Embarked'].mode()[0])  # Fill missing values
    return X

def create_family_size(X):
    X['FamilySize'] = X['SibSp'] + X['Parch'] + 1  # Adding 1 for the individual themselves
    return X

def drop_columns(X):
    return X.drop(['SibSp', 'Parch'], axis=1)

def family_size(X):
    X = create_family_size(X)
    X = drop_columns(X)
    return X

# Create pipelines for different features
age_pipeline = Pipeline(steps=[
    ('age_imputer', SimpleImputer(strategy='mean')),  # Impute Age
    ('age_scaler', MinMaxScaler())  # Scale Age
])

fare_pipeline = Pipeline(steps=[
    ('fare_scaler', MinMaxScaler())  # Scale Fare
])

family_size_pipeline = Pipeline(steps=[
    ('family_size_creator', FunctionTransformer(family_size)),
    ('family_size_scaler', MinMaxScaler()),  # Scale Family_Size
])

embarked_pipeline = Pipeline(steps=[
    ('embarked_imputer', FunctionTransformer(impute_embarked)),  # Impute Embarked
    ('embarked_onehot', OneHotEncoder())  # One-hot encode Embarked
])

# Column transformer for preprocessing
preprocessor = ColumnTransformer(transformers=[
    ('drop', 'drop', ['PassengerId', 'Name', 'Ticket', 'Cabin']),
    ('age_encoder', age_pipeline, ['Age']),
    ('fare_encoder', fare_pipeline, ['Fare']),
    ('family_size', family_size_pipeline, ['SibSp', 'Parch']),  # Process FamilySize
    ('embarked_encoder', embarked_pipeline, ['Embarked']),
    ('sex_encoder', OneHotEncoder(), ['Sex']),
    ('scaler', MinMaxScaler(), ['Pclass']),  # Scale Pclass
], remainder='passthrough')

# Build the ANN model
def ann_model(input_shape):
    model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=input_shape), # Input layer
        tf.keras.layers.Dense(64, activation='relu'),  # Hidden layer
        tf.keras.layers.Dense(32, activation='relu'),  # Hidden layer
        tf.keras.layers.Dense(1, activation='sigmoid')  # Output layer (binary classification)
    ])
    return model

# Prepare the data
X = data.drop('Survived', axis=1)
y = data['Survived']

# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Preprocess the data (without including the classifier in the pipeline)
X_train_processed = preprocessor.fit_transform(X_train)
X_test_processed = preprocessor.transform(X_test)

print(f"X_train shape: {X_train_processed.shape}")
print(f"X_test shape: {X_test_processed.shape}")

# Print the first 5 rows of X_train_processed
#print(X_train_processed[:5])


# Build the ANN model
model = ann_model(input_shape=(X_train_processed.shape[1],))
print(f"Model input shape: {model.input_shape}")

#Model Summary
print(model.summary())

# Compile the ANN model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Train the ANN model
model.fit(X_train_processed, y_train, epochs=10, batch_size=32, validation_split=0.2, verbose=1)

# Make predictions on the test set
y_pred = model.predict(X_test_processed)
y_pred = (y_pred > 0.5).astype("int32")  # Convert predictions to 0 or 1

# Evaluate the model
ann_accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {ann_accuracy:.2f}")

# Confusion Matrix
print("Confusion Matrix:")
print(confusion_matrix(y_test, y_pred))


X_train shape: (712, 9)
X_test shape: (179, 9)
Model input shape: (None, 9)


None
Epoch 1/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 28ms/step - accuracy: 0.7216 - loss: 0.6370 - val_accuracy: 0.8252 - val_loss: 0.5446
Epoch 2/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.7669 - loss: 0.5724 - val_accuracy: 0.8601 - val_loss: 0.4869
Epoch 3/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7856 - loss: 0.5291 - val_accuracy: 0.8252 - val_loss: 0.4489
Epoch 4/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.7838 - loss: 0.5037 - val_accuracy: 0.8322 - val_loss: 0.4322
Epoch 5/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.8148 - loss: 0.4575 - val_accuracy: 0.8322 - val_loss: 0.4172
Epoch 6/10
[1m18/18[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step - accuracy: 0.7926 - loss: 0.4681 - val_accuracy: 0.8322 - val_loss: 0.4210
Epoch 7/10
[1m18/18[0m [32m━━━━

# **Experiment Tracking**

In [None]:
import mlflow
import mlflow.sklearn

# Set the tracking URI and experiment name for Random Forest
mlflow.set_tracking_uri(uri="http://3.91.21.217:5000")
mlflow.set_experiment("ANN Experiment")

# Start a new MLflow run
with mlflow.start_run():

    # Log the hyperparameters
    mlflow.log_param("model","ANN")
    mlflow.log_param("hidden_layers", 2)
    mlflow.log_param("optimizer", "adam")
    mlflow.log_param("batch_size", 32)
    mlflow.log_param("learning_rate", "constant")
    mlflow.log_param("learning_rate_init", 0.001)
    mlflow.log_param("epochs", 10)
    mlflow.log_param("verbose", True)
    mlflow.log_param("validation_fraction", 0.2)


    # Log the accuracy metric
    mlflow.log_metric("accuracy", ann_accuracy)

    # Log the ANN model (use the rf_pipeline variable)
    mlflow.sklearn.log_model(model, "ANN Algorithm")


2024/12/18 12:36:00 INFO mlflow.tracking.fluent: Experiment with name 'ANN Experiment' does not exist. Creating a new experiment.


🏃 View run sophisticated-pug-39 at: http://3.91.21.217:5000/#/experiments/294652062718827529/runs/197d438e29e34d2a9106d88365c63c12
🧪 View experiment at: http://3.91.21.217:5000/#/experiments/294652062718827529


# **Model Inference**

In [None]:
# Load the model back for predictions as a generic Python Function model
# Cell for ubuntu-MLFlow EC2 Machine.
#model_uri = f"models:/Random Forest@champion"
loaded_model = mlflow.pyfunc.load_model("models:/KNN/1")

predictions = loaded_model.predict(X_test)
#print(type(predictions))

result = pd.DataFrame(X_teast)
result["actual_class"] = y_test
result["predicted_class"] = predictions

accuracy = accuracy_score(y_test, predictions)
print(f"\nModel Accuracy: {accuracy:.2f}")

result[:20]
#result.head()

# Lab Task

Perform the steps mentioned above on the following dataset

https://www.kaggle.com/datasets/kamilpytlak/personal-key-indicators-of-heart-disease