# Boston Housing Classification SVM MLFlow

In [1]:
import sys
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import mlflow as mlf
import seaborn as sns
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline, Pipeline


import pickle
sys.path.append("..")

In [2]:
inputFile = "../data/Boston_Housing_Data.csv"

## Read the data into DataFrame

In [None]:
df = pd.read_csv(inputFile,delimiter=";")
print(df.info())

## Feature selection 

In [None]:
df_features = df.drop(["MEDV","CAT"],axis=1) # drop label attribute from the features
df_labels = df[["CAT"]].copy()
display(df_features)
display(df_labels)

## Train validate test split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(df_features,df_labels,test_size=0.2,random_state=1234)
X_train, X_validate, y_train, y_validate = train_test_split(X_train,y_train,test_size=0.2,random_state=1234)
display (X_train)
display (X_test) 
display (X_validate)
display (y_train)
display (y_test)
display (y_validate)

### Data standardization

In [6]:
scaler = StandardScaler().set_output(transform='pandas').fit(X_train) # fit the scaler to the training data
X_train = scaler.transform(X_train)
X_validate = scaler.transform(X_validate)
X_test = scaler.transform(X_test)

### Build the model Build the train Method

In [7]:
lsvc = LinearSVC(C=0.5, dual='auto', loss='hinge', max_iter=8000)

def train(sk_model, X, y):
    sk_model = sk_model.fit(X, y)
    train_acc = sk_model.score(X, y)
    mlf.log_metric("train_acc", train_acc)
    print(f"Train Accuracy: {train_acc:.3%}")
    return sk_model

#lsvc_model = train(lsvc,X_train,y_train["CAT"])

### Evaluate the model

In [8]:
def evaluate(sk_model, X, y):
    eval_acc = sk_model.score(X, y)
    preds = sk_model.predict(X)
    auc_score = accuracy_score(y, preds)
    mlf.log_metric("eval_acc", eval_acc)
    mlf.log_metric("auc_score", auc_score)
    print(f"Auc Score: {auc_score:.3%}")
    print(f"Eval Accuracy: {eval_acc:.3%}")   
    conf_matrix = confusion_matrix(y_test, preds)
    ax = sns.heatmap(conf_matrix, annot=True,fmt='g') 
    ax.invert_xaxis()
    ax.invert_yaxis()
    plt.ylabel('Actual')
    plt.xlabel('Predicted')
    plt.title("Confusion Matrix") 
    plt.savefig("sklearn_conf_matrix.png")
    mlf.log_artifact("sklearn_conf_matrix.png")


### Run and log experiments and models

In [None]:
model_name = "svm_model"
model_uid = None
mlf.set_experiment("bosten_housing_svm_experiment") 
with mlf.start_run():
    lsvc_model= train(lsvc, X_train,y_train["CAT"])
    evaluate(lsvc_model, X_test, y_test["CAT"])
    inferred_signature = mlf.models.infer_signature(X_train, y_test)
    mlf.sklearn.log_model(lsvc_model, model_name, signature=inferred_signature)
    model_uid = mlf.active_run().info.run_uuid
    print("Model run: ",model_uid )
mlf.end_run()

## Load the last model

In [10]:
loaded_model = mlf.sklearn.load_model("runs:/"+model_uid+"/"+model_name)

## Test the model

In [None]:

y_pred = loaded_model.predict(X_test)
accuracy = accuracy_score(y_test,y_pred)
print("Test Error = " ,(1.0 - accuracy))