Used libraries:

In [167]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import ElasticNet
import optuna
import pickle

## Preprocessing

In [157]:
# Import dataset
train_df = pd.read_csv('../../backend/data/training-housing-dataset.csv')

In [158]:
train_df.head()

Unnamed: 0.1,Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,total_rooms,area_per_room,amenity_count
0,0,1.0,0.553672,4,2,2,1,0,0,0,1,1,1,1,6,0.243257,2
1,1,0.990741,0.20904,4,2,2,1,0,0,1,0,2,0,2,6,0.08353,2
2,2,0.990741,0.694915,3,2,2,1,0,0,0,0,0,1,1,5,0.390416,1
3,3,0.972222,0.491525,4,1,2,1,0,1,0,0,2,0,1,5,0.277298,2
4,4,0.972222,0.559322,4,2,2,1,1,1,0,1,1,1,0,6,0.245876,4


In [159]:
# Scaling Data
scaled_columns = ["price", "area", "area_per_room"]
scaler = MinMaxScaler()
train_df[scaled_columns] = scaler.fit_transform(train_df[scaled_columns])

In [160]:
# Creating training and testing datasets
X = train_df.drop(columns="price")
Y = train_df['price']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

In [161]:
# Creating objective function for model optimization

def objective_elastic(trial):
    alpha = trial.suggest_loguniform('alpha', 1e-4, 1e1)
    l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
    
    # Creating, training and predicting Elastic Net model with suggested hyperparams
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    mse=mean_squared_error(Y_test, y_pred)
    
    return mse

In [164]:
# Creating study and optimizing
study = optuna.create_study(direction='minimize') # To minimize MSE
study.optimize(objective_elastic, n_trials=100)

[I 2024-09-28 18:36:49,225] A new study created in memory with name: no-name-d7f9d386-ef60-4e02-a504-19753000e448
[I 2024-09-28 18:36:49,232] Trial 0 finished with value: 0.005451283292583611 and parameters: {'alpha': 2.2280544857699813, 'l1_ratio': 0.43281725937580506}. Best is trial 0 with value: 0.005451283292583611.
[I 2024-09-28 18:36:49,236] Trial 1 finished with value: 0.005238833486875389 and parameters: {'alpha': 1.0368093051885305, 'l1_ratio': 0.27269313831764597}. Best is trial 1 with value: 0.005238833486875389.
[I 2024-09-28 18:36:49,240] Trial 2 finished with value: 0.005217758527315619 and parameters: {'alpha': 0.6786818736051592, 'l1_ratio': 0.3005175603303166}. Best is trial 2 with value: 0.005217758527315619.
[I 2024-09-28 18:36:49,245] Trial 3 finished with value: 0.005302481308538445 and parameters: {'alpha': 0.0004933793356601141, 'l1_ratio': 0.15028716534069275}. Best is trial 2 with value: 0.005217758527315619.
[I 2024-09-28 18:36:49,248] Trial 4 finished with va

In [165]:
print('Best parameters found: ', study.best_params)
print("Best MSE achieved", study.best_value)

Best parameters found:  {'alpha': 0.017793310889306406, 'l1_ratio': 0.5346506694761376}
Best MSE achieved 0.005168986430905092


In [166]:
# Training final model with best parameters
best_params = study.best_params
final_model = ElasticNet(alpha=best_params['alpha'], l1_ratio=best_params['l1_ratio'], random_state=42)
# Training model on entire set
final_model.fit(X_train, Y_train)

# Model Evaluation
y_pred = final_model.predict(X_test)
mse = mean_squared_error(Y_test, y_pred)
print(f"Test MSE with optimized hyperparameters: {mse}")

Test MSE with optimized hyperparameters: 0.005168986430905092


In [168]:
# Saving model for use with website

with open('elastic_net_model.pkl', 'wb') as file:
    pickle.dump(final_model, file)