In [18]:
import pandas as pd

dataset = pd.read_csv('./Datasets/car_data.csv')
dataset.head()

Unnamed: 0,User ID,Gender,Age,AnnualSalary,Purchased
0,385,Male,35,20000,0
1,681,Male,40,43500,0
2,353,Male,49,74000,0
3,895,Male,40,107500,1
4,661,Male,25,79000,0


In [19]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split

input_cols = ['Gender', 'Age', 'AnnualSalary' ]
output_cols = ['Purchased']

In [20]:
one_hot = OneHotEncoder()
dataset["Gender"] = one_hot.fit_transform(dataset["Gender"].values.reshape(-1, 1)).toarray()
dataset["Gender"]

0      0.0
1      0.0
2      0.0
3      0.0
4      0.0
      ... 
995    0.0
996    1.0
997    1.0
998    1.0
999    1.0
Name: Gender, Length: 1000, dtype: float64

In [21]:
X_train, X_test, y_train, y_test = train_test_split(dataset[input_cols], dataset[output_cols], test_size=0.2, random_state=0)

In [22]:
dataset.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   User ID       1000 non-null   int64  
 1   Gender        1000 non-null   float64
 2   Age           1000 non-null   int64  
 3   AnnualSalary  1000 non-null   int64  
 4   Purchased     1000 non-null   int64  
dtypes: float64(1), int64(4)
memory usage: 39.2 KB


In [24]:
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score

params = {
    'n_estimators': 100,
    'learning_rate': 0.1,
    'max_depth': 6,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    'gamma': 0,
    'reg_alpha': 0,
    'reg_lambda': 1
}

model_1 = XGBClassifier(**params)
model_1.fit(X_train, y_train.values.ravel())

In [26]:
accuracy = accuracy_score(y_test, model_1.predict(X_test))
print(f"Accuracy: {accuracy}")

Accuracy: 0.93


In [28]:
import mlflow
from mlflow.models.signature import infer_signature

mlflow.set_tracking_uri(uri="http://127.0.0.1:5000")
mlflow.set_experiment("car_data_xgb1")

with mlflow.start_run():
    mlflow.log_params(params)
    mlflow.log_metric("accuracy", accuracy)
    signature = infer_signature(X_train, model_1.predict(X_train))
    mlflow.sklearn.log_model(model_1, 
                            "model",
                            signature=signature,
                            input_example=X_train,
                            registered_model_name="car_data_xgb1") 

2024/11/22 10:44:10 INFO mlflow.tracking.fluent: Experiment with name 'car_data_xgb1' does not exist. Creating a new experiment.


Downloading artifacts:   0%|          | 0/7 [00:00<?, ?it/s]

Successfully registered model 'car_data_xgb1'.
2024/11/22 10:44:17 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: car_data_xgb1, version 1


🏃 View run gifted-shark-96 at: http://127.0.0.1:5000/#/experiments/224260765374330983/runs/a222e6f971d14c18930c7ac19cd5f086
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/224260765374330983


Created version '1' of model 'car_data_xgb1'.
