In [1]:
import mlflow
import mlflow.sklearn
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from sklearn.svm import SVR
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.read_csv('housing.csv')
df = df.dropna()

# Encode the categorical 'ocean_proximity' column
label_encoder = LabelEncoder()
df['ocean_proximity'] = label_encoder.fit_transform(df['ocean_proximity'])

In [3]:
X = df.drop('median_house_value', axis=1)  # Drop target column to get features
y = df['median_house_value']  # Target column

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [4]:
models = {
    "LinearRegression": LinearRegression(),
    "RandomForest": RandomForestRegressor(n_estimators=100, random_state=42),
    "SVR": SVR(kernel="linear")
}

In [5]:
for model_name, model in models.items():
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    
    mse = mean_squared_error(y_test, y_pred)
    r2 = r2_score(y_test, y_pred)
    
    print(f"{model_name} Mean Squared Error: {mse:.4f}")
    print(f"{model_name} R-squared: {r2:.4f}")

LinearRegression Mean Squared Error: 4924108937.9120
LinearRegression R-squared: 0.6399
RandomForest Mean Squared Error: 2449175194.7867
RandomForest R-squared: 0.8209
SVR Mean Squared Error: 9008898130.6787
SVR R-squared: 0.3412


In [8]:
mlflow.set_tracking_uri("http://127.0.0.1:5000/")

mlflow.set_experiment("Housing_Price_Predictions")

for model_name, model in models.items():
    with mlflow.start_run(run_name=model_name):
        mlflow.log_param("model_name", model_name)
        
        if model_name == "RandomForest":
            mlflow.log_param("n_estimators", 100)
        elif model_name == "SVR":
            mlflow.log_param("kernel", "linear")
        
        model.fit(X_train, y_train)
        
        y_pred = model.predict(X_test)
        
        mse = mean_squared_error(y_test, y_pred)
        r2 = r2_score(y_test, y_pred)
        
        mlflow.log_metric("Mean_Squared_Error", mse)
        mlflow.log_metric("R_squared", r2)
        
        mlflow.sklearn.log_model(model, model_name)


2025/01/02 01:35:09 INFO mlflow.tracking.fluent: Experiment with name 'Housing_Price_Predictions' does not exist. Creating a new experiment.


🏃 View run LinearRegression at: http://127.0.0.1:5000/#/experiments/594681456156421096/runs/d83de5b22ad5413c9cc8539ba918ee0e
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/594681456156421096




🏃 View run RandomForest at: http://127.0.0.1:5000/#/experiments/594681456156421096/runs/357e513f5cb84c59b313148c75f074c0
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/594681456156421096




🏃 View run SVR at: http://127.0.0.1:5000/#/experiments/594681456156421096/runs/f9aa6c6c0e884169988a5647513265e7
🧪 View experiment at: http://127.0.0.1:5000/#/experiments/594681456156421096
