In [55]:
#import required libraries
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn.ensemble import RandomForestRegressor
import mlflow

In [56]:
#import Data
data_path = "C:\\Users\\saikumar.godha\\Downloads\\house_data.csv"
Data = pd.read_csv(data_path)
Data.head(8)

Unnamed: 0,id,date,price,bedrooms,bathrooms,sqft_living,sqft_lot,floors,waterfront,view,...,grade,sqft_above,sqft_basement,yr_built,yr_renovated,zipcode,lat,long,sqft_living15,sqft_lot15
0,7129300520,20141013T000000,221900.0,3,1.0,1180,5650,1.0,0,0,...,7,1180,0,1955,0,98178,47.5112,-122.257,1340,5650
1,6414100192,20141209T000000,538000.0,3,2.25,2570,7242,2.0,0,0,...,7,2170,400,1951,1991,98125,47.721,-122.319,1690,7639
2,5631500400,20150225T000000,180000.0,2,1.0,770,10000,1.0,0,0,...,6,770,0,1933,0,98028,47.7379,-122.233,2720,8062
3,2487200875,20141209T000000,604000.0,4,3.0,1960,5000,1.0,0,0,...,7,1050,910,1965,0,98136,47.5208,-122.393,1360,5000
4,1954400510,20150218T000000,510000.0,3,2.0,1680,8080,1.0,0,0,...,8,1680,0,1987,0,98074,47.6168,-122.045,1800,7503
5,7237550310,20140512T000000,1225000.0,4,4.5,5420,101930,1.0,0,0,...,11,3890,1530,2001,0,98053,47.6561,-122.005,4760,101930
6,1321400060,20140627T000000,257500.0,3,2.25,1715,6819,2.0,0,0,...,7,1715,0,1995,0,98003,47.3097,-122.327,2238,6819
7,2008000270,20150115T000000,291850.0,3,1.5,1060,9711,1.0,0,0,...,7,1060,0,1963,0,98198,47.4095,-122.315,1650,9711


In [23]:
# To run mlflow server run below first command 
# mlflow ui
# mlflow server --backend-store-uri sqlite:///mlflow.db --default-artifact-root ./artifacts --host 127.0.0.1 --port 5000
remote_server_uri = "http://127.0.0.1:5000" # set to your server URI
mlflow.set_tracking_uri(remote_server_uri)  # or set the MLFLOW_TRACKING_URI in the env

In [24]:
mlflow.tracking.get_tracking_uri()

'http://127.0.0.1:5000'

In [57]:
Data = Data.drop('date', axis=1)
Data = Data.drop('id',axis=1)
Data = Data.drop('zipcode',axis=1)

In [62]:
def train(test_size = 0.3):
    with mlflow.start_run(run_name='PARENT_RUN') as parent_run:
        mlflow.log_param("parent", "yes")
        with mlflow.start_run(run_name='DATA_PREP', nested=True) as child_run_1:
            mlflow.log_param("child", "yes")
            X = Data.drop('price',axis =1).values
            y = Data['price'].values
            X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = test_size, random_state = 101)
        with mlflow.start_run(run_name='MODEL', nested=True) as child_run_2:
            mlflow.log_param("child", "yes")
            m = RandomForestRegressor(n_jobs=-1, oob_score=True)
            m.fit(X_train,y_train)
            y_pred = m.predict(X_test)
        with mlflow.start_run(run_name='EVALUATION', nested=True) as child_run_3:
            mlflow.log_param("child", "yes")
            mae = metrics.mean_absolute_error(y_test, y_pred)  
            mse = metrics.mean_squared_error(y_test, y_pred)
            rmse = np.sqrt(metrics.mean_squared_error(y_test, y_pred))
            mape = np.sqrt(metrics.mean_absolute_percentage_error(y_test, y_pred))
            VarScore = metrics.explained_variance_score(y_test,y_pred)

        # Log parameter, metrics, and model to MLflow
        print("test_size =",test_size)
        print(f"mae : {mae}, mse : {mse}, rmse : {rmse}, mape : {mape}, Varscore : {VarScore}")
        mlflow.log_param(key="test_size", value=test_size)
        mlflow.log_metrics({"mae": mae, "mse" : mse, "rmse" : rmse, "mape" : mape, "VarScore": VarScore})
        mlflow.log_artifact(data_path)
        print("Save to: {}".format(mlflow.get_artifact_uri()))
        mlflow.sklearn.log_model(m, "model")

In [63]:
train(0.3)

test_size = 0.3
mae : 70489.75463907249, mse : 17440541735.822155, rmse : 132062.6432259409, mape : 0.36666817306886773, Varscore : 0.8756709848913576
Save to: ./artifacts/0/f9c84727b42642b8ad399e88b7013c25/artifacts


In [64]:
train(0.25)

test_size = 0.25
mae : 70495.63558523815, mse : 18136454293.280117, rmse : 134671.6536368367, mape : 0.36572478868337627, Varscore : 0.8720730327665146
Save to: ./artifacts/0/e6ccdf07b59e430aaa99d8c29875133f/artifacts


In [65]:
train(0.2)

test_size = 0.2
mae : 68855.43840552562, mse : 17067126542.254047, rmse : 130641.21303116428, mape : 0.36337063321566443, Varscore : 0.8749046561565601
Save to: ./artifacts/0/252c08037f23436c8d133d8dcf9c0f0e/artifacts
