### Installing dependencies

In [None]:
import pandas as pd
import mlflow
import mlflow.sklearn
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.svm import SVC
import pickle
from sklearn.neighbors import KNeighborsClassifier
import os


### Loading the training and test datasets

In [3]:

train_df = pd.read_csv('../Data/train.csv')
test_df = pd.read_csv ('../Data/test.csv')

# printing the first 5 records
print(train_df.head())
print(test_df.head())


   Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  \
0           0            2      108             80              0        0   
1           1            0      173             78             32      265   
2           2            1      120             80             48      200   
3           3            0      121             66             30      165   
4           4            1      126             56             29      152   

    BMI  DiabetesPedigreeFunction  Age  Outcome  
0  27.0                     0.259   52        1  
1  46.5                     1.159   58        0  
2  38.9                     1.162   41        0  
3  34.3                     0.203   33        1  
4  28.7                     0.801   21        0  
   Unnamed: 0  Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin  \
0         614            2       87             58             16       52   
1         615           10       92             62              0        0 

### Setting the MLFlow URI  to store the models' logs and results of the expreiments.
### Setting the path to store the trained models. 
### Three ML algorithms implemented.

In [11]:
mlflow.set_tracking_uri('file:///C:/Users/HP/MLOPS_Assignment1/notebooks/new_mlruns')
randomforest_fpath = 'C:/Users/HP/MLOPS_Assignment1/Models/random_forest_model.pkl'
svm_fpath = 'C:/Users/HP/MLOPS_Assignment1/Models/svm_model.pkl'
knn_fpath = 'C:/Users/HP/MLOPS_Assignment1/Models/knn_model.pkl'

### Preparing the training and testing data used for training and testing the 3 models

In [12]:
X_train = train_df.drop(columns=["Outcome"])  
y_train = train_df["Outcome"]
X_test = test_df.drop(columns=["Outcome"])
y_test = test_df["Outcome"]

### Random Forest ML Model 

In [23]:
mlflow.set_experiment("RandomForest_Model")

# Start an MLflow run
with mlflow.start_run():
    # Define model parameters
    n_estimators = 100
    max_depth = 10

    # Log parameters
    mlflow.log_param("n_estimators", n_estimators)
    mlflow.log_param("max_depth", max_depth)

   
    # Train the model
    model = RandomForestClassifier(n_estimators=n_estimators, max_depth=max_depth, random_state=42)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model
    mlflow.sklearn.log_model(model, "random_forest_model")

    print(f"Model logged with accuracy: {accuracy}")

#print("Current tracking URI:", mlflow.get_tracking_uri())
    

2024/08/05 21:28:36 INFO mlflow.tracking.fluent: Experiment with name 'RandomForest_Model' does not exist. Creating a new experiment.


Model logged with accuracy: 0.8116883116883117


### Support Vector Machine (SVM) ML Model 

In [15]:
mlflow.set_experiment("SVM_MODEL")

with mlflow.start_run():
    # Define model parameters
    C = 1.0
    kernel = 'linear'

    # Log parameters
    mlflow.log_param("C", C)
    mlflow.log_param("kernel", kernel)


    # Train the model
    model = SVC(C=C, kernel=kernel, random_state=42)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model in MLflow
    mlflow.sklearn.log_model(model, "svm_model")
    print(f"Model logged with accuracy: {accuracy}")
   


2024/08/05 21:14:42 INFO mlflow.tracking.fluent: Experiment with name 'SVM_MODEL' does not exist. Creating a new experiment.


Model logged with accuracy: 0.8246753246753247


### K-Nearest Neighbour ML Model 

In [24]:
mlflow.set_experiment("KNN_MODEL")


with mlflow.start_run():
    
    n_neighbors = 5
    weights = 'uniform'  # or 'distance'

    # parameter logging
    mlflow.log_param("n_neighbors", n_neighbors)
    mlflow.log_param("weights", weights)

   
    # Train the model
    model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights)
    model.fit(X_train, y_train)

    # Predict on test set
    predictions = model.predict(X_test)
    accuracy = accuracy_score(y_test, predictions)

    # Log metrics
    mlflow.log_metric("accuracy", accuracy)

    # Log the model in MLflow
    mlflow.sklearn.log_model(model, "knn_model")

    # Save the model to your 'models/saved_model/' directory
    

    print(f"Model logged with accuracy: {accuracy}")
    #print(f"Model saved to: {knn_fpath}")




Model logged with accuracy: 0.7337662337662337


### Serializing the model in a pickle file


In [25]:
with open(randomforest_fpath, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {randomforest_fpath}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/random_forest_model.pkl


In [20]:
with open(svm_fpath, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {svm_fpath}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/svm_model.pkl


In [21]:

with open(knn_fpath, 'wb') as f:
    pickle.dump(model, f)

print(f"Model saved to: {knn_fpath}")

Model saved to: C:/Users/HP/MLOPS_Assignment1/Models/knn_model.pkl


In [17]:
!mlflow ui

^C


In [7]:
import mlflow

print("Current tracking URI:", mlflow.get_tracking_uri())

Current tracking URI: file:///C:/Users/HP/MLOPS_Assignment1/notebooks/mlruns
