#### **AML Assignment 02 : Version Control**
#### **Task 2.2: Model version control and Experiment tracking**


- Name: Soumyajoy Kundu
- Roll No: MDS202349

----


* in `train.ipynb` track the experiments and model versions using mlflow
    * build, track, and register 3 benchmark models using MLflow
    * checkout and print the model selection metric AUCPR for each of the three benchmark models

---
**References**: (ML Experiment Tracking)
* https://mlflow.org/docs/latest/tracking.html
* https://mlflow.org/docs/latest/getting-started/intro-quickstart/index.html
* https://www.datarevenue.com/en-blog/how-we-track-machine-learning-experiments-with-mlflow
* https://towardsdatascience.com/experiment-tracking-with-mlflow-in-10-minutes-f7c2128b8f2c
https://madewithml.com/courses/mlops/experiment-tracking/



In [1]:
!pip install mlflow



### Importing Necessary Libraries

In [2]:
import nltk
import pandas as pd
import numpy as np
import sklearn
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.metrics import accuracy_score, classification_report
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize
from sklearn.pipeline import make_pipeline
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn import metrics

import random
random.seed(42)

from urllib.parse import urlparse
import seaborn as sns
import matplotlib.pyplot as plt
import re

from sklearn.naive_bayes import MultinomialNB
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import mlflow
import mlflow.sklearn
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score

%matplotlib inline
import matplotlib.pyplot as plt

nltk.download('stopwords')

import warnings
warnings.filterwarnings("ignore")

[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


### Loading Datasets

In [3]:
raw_data = pd.read_csv("SMSSpamCollection.csv")
raw_data

Unnamed: 0,Label,Message
0,ham,"Go until jurong point, crazy.. Available only ..."
1,ham,Ok lar... Joking wif u oni...
2,spam,Free entry in 2 a wkly comp to win FA Cup fina...
3,ham,U dun say so early hor... U c already then say...
4,ham,"Nah I don't think he goes to usf, he lives aro..."
...,...,...
5569,spam,This is the 2nd time we have tried 2 contact u...
5570,ham,Will ü b going to esplanade fr home?
5571,ham,"Pity, * was in mood for that. So...any other s..."
5572,ham,The guy did some bitching but I acted like i'd...


In [4]:
train = pd.read_csv("train.csv")
val = pd.read_csv("validation.csv")
test = pd.read_csv("test.csv")

### Data Preparation
* Preparing the attributes and labels for training

In [5]:
X_train, y_train = train["Message"], train["Label"]
X_val, y_val = val["Message"], val["Label"]
X_test, y_test = test["Message"], test["Label"]

In [6]:
def evaluate_model(model, X_test, y_test):
    """
    Evaluate the final model on the test set.
    """
    y_pred = model.predict(X_test)
    tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

    precision = precision_score(y_test, y_pred)
    recall = recall_score(y_test, y_pred)
    f1_score_val = f1_score(y_test, y_pred)
    accuracy = accuracy_score(y_test, y_pred)
    roc_auc_val = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, 'predict_proba') else 0



    return {
        "Precision": round(precision,4),
        "Recall": round(recall,4),
        "F1-Score": round(f1_score_val,4),
        "Accuracy": round(accuracy,4),
        "ROC-AUC": round(roc_auc_val,4)
    }

### Training 3 Benchmark Models

### Naive Bayes Classifier

In [7]:
# Naive Bayes
print("Naive Bayes\n\n")
pipeline_nb = make_pipeline(CountVectorizer(), MultinomialNB(alpha = 0.1))
pipeline_nb.fit(X_train, y_train)

# Evaluate the model on validation set
predictions = pipeline_nb.predict(X_val)

# Evaluation Metrics
metrics = evaluate_model(pipeline_nb, X_val, y_val)

print("On validation Dataset:", end = "\n")
print("Accuracy : " + str(round(metrics['Accuracy']*100, 2)) + "%")
print("Precision : " + str(round(metrics['Precision']*100, 2)) + "%")
print("Recall : " + str(round(metrics['Recall']*100, 2)) + "%")
print("f1 score : " + str(round(metrics['F1-Score']*100, 2)) + "%")
print("AUCPR : " + str(round(metrics['ROC-AUC']*100, 2)) + "%")

Naive Bayes


On validation Dataset:
Accuracy : 97.87%
Precision : 91.74%
Recall : 92.5%
f1 score : 92.12%
AUCPR : 98.12%


### Logistic Regression

In [8]:
# Logistic Regression
print("Logistic Regression\n\n")
pipeline_lr = make_pipeline(CountVectorizer(), LogisticRegression(random_state = 42))
pipeline_lr.fit(X_train, y_train)

# Evaluate the model on validation set
predictions = pipeline_lr.predict(X_val)

# Evaluation Metrics
metrics = evaluate_model(pipeline_lr, X_val, y_val)

print("On validation Dataset:", end = "\n")
print("Accuracy : " + str(round(metrics['Accuracy']*100, 2)) + "%")
print("Precision : " + str(round(metrics['Precision']*100, 2)) + "%")
print("Recall : " + str(round(metrics['Recall']*100, 2)) + "%")
print("f1 score : " + str(round(metrics['F1-Score']*100, 2)) + "%")
print("AUCPR : " + str(round(metrics['ROC-AUC']*100, 2)) + "%")

Logistic Regression


On validation Dataset:
Accuracy : 97.53%
Precision : 97.12%
Recall : 84.17%
f1 score : 90.18%
AUCPR : 98.22%


### Random Forest Classifier

In [9]:
# Random Forest Classifier
print("Random Forest Classifier\n\n")
pipeline_rf = make_pipeline(CountVectorizer(), RandomForestClassifier(random_state = 42, max_depth=60, n_jobs=-1))
pipeline_rf.fit(X_train, y_train)

# Evaluate the model on validation set
predictions = pipeline_rf.predict(X_val)

# Evaluation Metrics
metrics = evaluate_model(pipeline_rf, X_val, y_val)

print("On validation Dataset:", end = "\n")
print("Accuracy : " + str(round(metrics['Accuracy']*100, 2)) + "%")
print("Precision : " + str(round(metrics['Precision']*100, 2)) + "%")
print("Recall : " + str(round(metrics['Recall']*100, 2)) + "%")
print("f1 score : " + str(round(metrics['F1-Score']*100, 2)) + "%")
print("AUCPR : " + str(round(metrics['ROC-AUC']*100, 2)) + "%")

Random Forest Classifier


On validation Dataset:
Accuracy : 97.2%
Precision : 100.0%
Recall : 79.17%
f1 score : 88.37%
AUCPR : 97.87%


In [10]:
# Naive Bayes
with mlflow.start_run(run_name="Naive Bayes"):
    y_pred = pipeline_nb.predict(X_test)
    mlflow.log_param("model_name", "Naive Bayes")
    mlflow.log_metric("accuracy", accuracy_score(y_pred, y_test))
    mlflow.log_metric("precision", precision_score(y_pred, y_test))
    mlflow.log_metric("recall", recall_score(y_pred, y_test))
    mlflow.log_metric("f1 score", f1_score(y_pred, y_test))
    mlflow.log_metric("AUCPR", roc_auc_score(y_pred, y_test))
    mlflow.log_dict(np.array(confusion_matrix(y_test, y_pred)).tolist(), "confusion_matrix.json")
    mlflow.sklearn.log_model(pipeline_nb, "model")

    tracking_url_type = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=pipeline_nb,
        artifact_path="sklearn-model",
        registered_model_name="Naive Bayes model"
    )
    if tracking_url_type != "file":
        mlflow.sklearn.log_model(pipeline_nb, "model", registered_model_name="Naive Bayes")
    else:
        mlflow.sklearn.log_model(pipeline_nb, "model")

Registered model 'Naive Bayes model' already exists. Creating a new version of this model...
Created version '2' of model 'Naive Bayes model'.


In [11]:
# Logistic Regression
with mlflow.start_run(run_name="Logistic Regression"):
    y_pred = pipeline_lr.predict(X_test)
    mlflow.log_param("model_name", "Logistic Regression")
    mlflow.log_metric("accuracy", accuracy_score(y_pred, y_test))
    mlflow.log_metric("precision", precision_score(y_pred, y_test))
    mlflow.log_metric("recall", recall_score(y_pred, y_test))
    mlflow.log_metric("f1 score", f1_score(y_pred, y_test))
    mlflow.log_metric("AUCPR", roc_auc_score(y_pred, y_test))
    mlflow.log_dict(np.array(confusion_matrix(y_test, y_pred)).tolist(), "confusion_matrix.json")
    mlflow.sklearn.log_model(pipeline_lr, "model")

    tracking_url_type = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=pipeline_nb,
        artifact_path="sklearn-model",
        registered_model_name="Logistic Regression model"
    )
    if tracking_url_type != "file":
        mlflow.sklearn.log_model(pipeline_lr, "model", registered_model_name="Logistic Regression")
    else:
        mlflow.sklearn.log_model(pipeline_lr, "model")

Registered model 'Logistic Regression model' already exists. Creating a new version of this model...
Created version '2' of model 'Logistic Regression model'.


In [None]:
# Random Forest
with mlflow.start_run(run_name="Random Forest"):
    y_pred = pipeline_rf.predict(X_test)
    mlflow.log_param("model_name", "Random Forest")
    mlflow.log_metric("accuracy", accuracy_score(y_pred, y_test))
    mlflow.log_metric("precision", precision_score(y_pred, y_test))
    mlflow.log_metric("recall", recall_score(y_pred, y_test))
    mlflow.log_metric("f1 score", f1_score(y_pred, y_test))
    mlflow.log_metric("AUCPR", roc_auc_score(y_pred, y_test))
    mlflow.log_dict(np.array(confusion_matrix(y_test, y_pred)).tolist(), "confusion_matrix.json")
    mlflow.sklearn.log_model(pipeline_rf, "model")

    tracking_url_type = urlparse(mlflow.get_tracking_uri()).scheme
    mlflow.sklearn.log_model(
        sk_model=pipeline_rf,
        artifact_path="sklearn-model",
        registered_model_name="Random Forest model"
    )
    if tracking_url_type != "file":
        mlflow.sklearn.log_model(pipeline_rf, "model", registered_model_name="Random Forest")
    else:
        mlflow.sklearn.log_model(pipeline_rf, "model")

Registered model 'Random Forest model' already exists. Creating a new version of this model...
Created version '2' of model 'Random Forest model'.
