### Importing Libraries

In [8]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

### Loading the dataset

In [9]:
df = pd.read_csv('housing.csv')
df = df.dropna()

# Encode the categorical 'ocean_proximity' column
label_encoder = LabelEncoder()
df['ocean_proximity'] = label_encoder.fit_transform(df['ocean_proximity'])

In [10]:
X = df.drop('median_house_value', axis=1)  # Drop target column to get features
y = df['median_house_value']  # Target column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "SVR": SVR(kernel="linear")
}

### Training the models

In [12]:
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{model_name} Mean Squared Error: {mse:.4f}")
    print(f"{model_name} R-squared: {r2:.4f}")

LinearRegression Mean Squared Error: 4924108937.9120
LinearRegression R-squared: 0.6399
RandomForest Mean Squared Error: 2449175194.7867
RandomForest R-squared: 0.8209
SVR Mean Squared Error: 9008898130.6787
SVR R-squared: 0.3412


In [14]:
import dagshub
dagshub.init(repo_owner='kavinmahendran09', repo_name='housing-prediction-mlflow', mlflow=True)


Output()



Open the following link in your browser to authorize the client:
https://dagshub.com/login/oauth/authorize?state=1fb8947f-982a-4dc6-ad58-918d39f9647d&client_id=32b60ba385aa7cecf24046d8195a71c07dd345d9657977863b52e7748e0f0f28&middleman_request_id=9a59c465e67ef33aefc5d23f249922bc6b4cae3af2cdc2a5b767971559cf15f7




### Logging model metrics and parameters into mlflow

In [None]:
mlflow.set_tracking_uri("https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow")

mlflow.set_experiment("Housing_Price_Prediction")

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("model_name", model_name)
        
        if model_name == "RandomForest":
            mlflow.log_param("n_estimators", 100)
        elif model_name == "SVR":
            mlflow.log_param("kernel", "linear")
        
        mlflow.log_metric("Mean_Squared_Error", mse)
        mlflow.log_metric("R_squared", r2)
        
        mlflow.sklearn.log_model(model, model_name)


2025/01/02 11:56:52 INFO mlflow.tracking.fluent: Experiment with name 'Housing_Price_Prediction' does not exist. Creating a new experiment.


🏃 View run LinearRegression at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0/runs/085497999a6f4ea5979ba84d519d0581
🧪 View experiment at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0




🏃 View run RandomForest at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0/runs/d0f4e43aa74c47cf882e00cfd2ae2991
🧪 View experiment at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0




🏃 View run SVR at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0/runs/8ef986c3fee047ab8bf84a6a4165ae82
🧪 View experiment at: https://dagshub.com/kavinmahendran09/housing-prediction-mlflow.mlflow/#/experiments/0


### Registering the best performing model

In [16]:
model_name = "RandomForest"
run_id = "d0f4e43aa74c47cf882e00cfd2ae2991"
model_uri = f"runs:/{run_id}/{model_name}"

result = mlflow.register_model(
    model_uri, model_name
)

Successfully registered model 'RandomForest'.
2025/01/02 12:16:41 INFO mlflow.store.model_registry.abstract_store: Waiting up to 300 seconds for model version to finish creation. Model name: RandomForest, version 1
Created version '1' of model 'RandomForest'.


### Loading the registered model and running it locally

In [18]:
model_name = "RandomForest"
model_version = "1"

model_uri = f"models:/{model_name}/{model_version}"

loaded_model = mlflow.sklearn.load_model(model_uri)
y_pred = loaded_model.predict(X_test)
print(y_pred[:5])

[234644.05 155161.   194443.   136627.   162542.  ]
