In [1]:
import pandas as pd
import mlflow
import mlflow.sklearn

### initialise the mlflow tracking

In [2]:
## enable autologging
# mlflow.sklearn.autolog()

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")

In [3]:
## create a new MLflow Experiment
mlflow.set_experiment("Deployment of ML Models")

<Experiment: artifact_location='mlflow-artifacts:/175961737334469267', creation_time=1738901739416, experiment_id='175961737334469267', last_update_time=1738901739416, lifecycle_stage='active', name='Deployment of ML Models', tags={}>

In [4]:
df = pd.read_csv("titanic.csv")

### Objective 
Create a ML classifier to predict whether a person will survive the titanic accident

In [5]:
df = df.fillna(0)

In [6]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          891 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        891 non-null    object 
 11  Embarked     891 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


In [7]:
df.head(3)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
0,1,0,3,"Braund, Mr. Owen Harris",male,22.0,1,0,A/5 21171,7.25,0,S
1,2,1,1,"Cumings, Mrs. John Bradley (Florence Briggs Th...",female,38.0,1,0,PC 17599,71.2833,C85,C
2,3,1,3,"Heikkinen, Miss. Laina",female,26.0,0,0,STON/O2. 3101282,7.925,0,S


In [8]:
df["gender_enc"]=df["Sex"].astype('category').cat.codes

In [9]:
df["embark_enc"]=df["Embarked"].astype('category').cat.codes

In [10]:
X = df[["Pclass","Age","gender_enc","embark_enc","Fare","SibSp","Parch"]]
Y = df["Survived"]

### Test-Train split the data

In [11]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.4, random_state=42)

### Start MLflow run

In [12]:
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

In [13]:
with mlflow.start_run():
    # step:1 initialise the model class
    model = DecisionTreeClassifier(criterion="entropy",max_depth=5)
    mlflow.log_params({'criterion':'entropy','max_depth':5})
    #step:2 train the model over training data
    model.fit(X_train,y_train)
    mlflow.log_params({'train_size':X_train.shape[0]})
    #step:3 predict this over test_set
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test,y_pred)*100
    mlflow.log_metric("accuracy",acc)
    mlflow.set_tag("Training info","Basic Decision Tree model on titanic dataset")
    ## log model
    model_info = mlflow.sklearn.log_model(
        sk_model = model,
        artifact_path = "iris_model",
        input_example = X_train,
        registered_model_name = "my_first_model"
    )
    

Successfully registered model 'my_first_model'.
2025/02/07 09:55:23 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: my_first_model, version 1


🏃 View run angry-crab-461 at: http://127.0.0.1:5000/#/experiments/175961737334469267/runs/c976222375754a97890d9e85dd14146a
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/175961737334469267


Created version '1' of model 'my_first_model'.
