In [1]:
# Adding needed libraries and reading data
import mlflow
import numpy as np
import pandas as pd
from sklearn import ensemble
from mlflow.models import infer_signature
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

train = pd.read_csv('./train.csv')

In [2]:
mlflow.set_tracking_uri(uri="http://127.0.0.1:8080")

In [3]:
selected = ['Id', 'LotFrontage', 'LotArea', 'OverallQual', 'YearBuilt',
       'YearRemodAdd', 'GrLivArea', 'FullBath', 'HalfBath', 'BedroomAbvGr',
       'TotRmsAbvGrd','SalePrice']
train = train[selected]

train_labels = train.pop('SalePrice')

features = train
features['LotFrontage'] = features['LotFrontage'].fillna(features['LotFrontage'].mean())
train_labels = np.log(train_labels)
train_features = features.drop('Id', axis=1).select_dtypes(include=[np.number]).values
### Splitting
x_train, x_test, y_train, y_test = train_test_split(train_features, train_labels, test_size=0.1, random_state=0)

params = {'n_estimators':3000, 'learning_rate':0.05, 'max_depth':3, 'max_features':'sqrt','min_samples_leaf':15, 'min_samples_split':10, 'loss':'huber'}

gbr = ensemble.GradientBoostingRegressor(**params).fit(x_train, y_train)

# Predict on the test set
y_pred = gbr.predict(x_test)

# Evaluate the model
r2 = r2_score(y_test, y_pred)
rmse = np.sqrt(mean_squared_error(y_test, y_pred))
mse = mean_squared_error(y_test, y_pred)
mae = mean_absolute_error(y_test, y_pred)
    
print("R-Square: ",r2)
print("RMSE: ",rmse)  
print("MSE: ",mse) 
print("MAE: ",mae)

R-Square:  0.866163361135729
RMSE:  0.1353756124299287
MSE:  0.01832655644077827
MAE:  0.09136979809551828


In [4]:
# mlflow server --host 127.0.0.1 --port 8080
mlflow.set_experiment("house price prediction one")

2025/01/14 21:25:14 INFO mlflow.tracking.fluent: Experiment with name 'house price prediction one' does not exist. Creating a new experiment.


<Experiment: artifact_location='mlflow-artifacts:/613472460124130980', creation_time=1736864714178, experiment_id='613472460124130980', last_update_time=1736864714178, lifecycle_stage='active', name='house price prediction one', tags={}>

In [5]:
# Start an MLflow run
with mlflow.start_run():
    # Log the hyperparameters
    mlflow.log_params(params)

    # Log the loss metric
    mlflow.log_metric("R2", r2)
    mlflow.log_metric("RMSE", rmse)
    mlflow.log_metric("MSE", mse)
    mlflow.log_metric("MAE", mae)

    # Set a tag that we can use to remind ourselves what this run was for
    mlflow.set_tag("Training Info", "Basic LR model for house price prediction")

    # Infer the model signature
    signature = infer_signature(x_train, gbr.predict(x_train))

    # Log the model
    model_info = mlflow.sklearn.log_model(
        sk_model=gbr,
        artifact_path="house_pred",
        signature=signature,
        input_example=x_train,
        registered_model_name="tracking-quickstart-house-price-prediction",
    )

Successfully registered model 'tracking-quickstart-house-price-prediction'.
2025/01/14 21:25:34 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: tracking-quickstart-house-price-prediction, version 1


🏃 View run casual-shrimp-6 at: http://127.0.0.1:8080/#/experiments/613472460124130980/runs/d9d9c63a15704193bb167993ae987963
🧪 View experiment at: http://127.0.0.1:8080/#/experiments/613472460124130980


Created version '1' of model 'tracking-quickstart-house-price-prediction'.


In [7]:
loaded_model = mlflow.pyfunc.load_model(model_info.model_uri)

In [9]:
features.columns

Index(['Id', 'LotFrontage', 'LotArea', 'OverallQual', 'YearBuilt',
       'YearRemodAdd', 'GrLivArea', 'FullBath', 'HalfBath', 'BedroomAbvGr',
       'TotRmsAbvGrd'],
      dtype='object')

In [11]:
predictions = loaded_model.predict(x_test)

feature_names = features.drop(['Id'], axis=1).columns

# Convert X_test validation feature data to a Pandas DataFrame
result = pd.DataFrame(x_test, columns=feature_names)

# Add the actual classes to the DataFrame
result["actual_class"] = y_test.values

# Add the model predictions to the DataFrame
result["predicted_class"] = predictions

result[:4]

Unnamed: 0,LotFrontage,LotArea,OverallQual,YearBuilt,YearRemodAdd,GrLivArea,FullBath,HalfBath,BedroomAbvGr,TotRmsAbvGrd,actual_class,predicted_class
0,70.049958,32668.0,6.0,1957.0,1975.0,2515.0,3.0,0.0,4.0,9.0,12.209188,12.482566
1,79.0,9490.0,6.0,1941.0,1950.0,1578.0,1.0,0.0,3.0,5.0,11.798104,11.826081
2,70.049958,7015.0,5.0,1950.0,1950.0,1203.0,1.0,0.0,3.0,5.0,11.608236,11.720792
3,83.0,10005.0,7.0,1977.0,1977.0,2022.0,2.0,1.0,4.0,8.0,12.165251,12.169749
