Used libraries:

In [187]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import ElasticNetCV
from sklearn.linear_model import ElasticNet
import optuna
import pickle

## Preprocessing

In [188]:
# Import dataset
train_df = pd.read_csv('/Users/kehindeslaptop/Code/projects/home-price-prediction/backend/data/training-housing-dataset.csv', index_col=0)

In [189]:
train_df.head()

Unnamed: 0,price,area,bedrooms,bathrooms,stories,mainroad,guestroom,basement,hotwaterheating,airconditioning,parking,prefarea,furnishingstatus,total_rooms,area_per_room,amenity_count
0,1.0,0.553672,4,2,2,1,0,0,0,1,1,1,1,6,0.243257,2
1,0.990741,0.20904,4,2,2,1,0,0,1,0,2,0,2,6,0.08353,2
2,0.990741,0.694915,3,2,2,1,0,0,0,0,0,1,1,5,0.390416,1
3,0.972222,0.491525,4,1,2,1,0,1,0,0,2,0,1,5,0.277298,2
4,0.972222,0.559322,4,2,2,1,1,1,0,1,1,1,0,6,0.245876,4


In [190]:
# Scaling Data
scaled_columns = ["price", "area", "area_per_room"]
scaler = MinMaxScaler()
train_df[scaled_columns] = scaler.fit_transform(train_df[scaled_columns])

In [204]:
# Saving the scaler for inverse scaling later:

with open('scaler.pkl', 'wb') as file:
    pickle.dump(scaler, file)

In [203]:
# Creating training and testing datasets
X = train_df.drop(columns="price")
Y = train_df['price']

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.15, random_state=42)

In [192]:
# Creating objective function for model optimization

def objective_elastic(trial):
    alpha = trial.suggest_loguniform('alpha', 1e-4, 1e1)
    l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
    
    # Creating, training and predicting Elastic Net model with suggested hyperparams
    model = ElasticNet(alpha=alpha, l1_ratio=l1_ratio, random_state=42)
    model.fit(X_train, Y_train)
    y_pred = model.predict(X_test)
    mse=mean_squared_error(Y_test, y_pred)
    
    return mse

In [193]:
# Creating study and optimizing
study = optuna.create_study(direction='minimize') # To minimize MSE
study.optimize(objective_elastic, n_trials=100)

[I 2024-10-04 02:45:51,707] A new study created in memory with name: no-name-ea757cde-169d-42d0-909b-10da095540d2
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e1)
  l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
[I 2024-10-04 02:45:51,711] Trial 0 finished with value: 0.06386752433309419 and parameters: {'alpha': 2.416077374290232, 'l1_ratio': 0.17999495959226808}. Best is trial 0 with value: 0.06386752433309419.
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e1)
  l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
  model = cd_fast.enet_coordinate_descent(
[I 2024-10-04 02:45:51,721] Trial 1 finished with value: 0.020592741315531436 and parameters: {'alpha': 0.00017796793872529537, 'l1_ratio': 0.1945133463465717}. Best is trial 1 with value: 0.020592741315531436.
  alpha = trial.suggest_loguniform('alpha', 1e-4, 1e1)
  l1_ratio = trial.suggest_uniform('l1_ratio', 0.0, 1.0)
[I 2024-10-04 02:45:51,723] Trial 2 finished with value: 0.03725735305905552 and parameters

In [194]:
print('Best parameters found: ', study.best_params)
print("Best MSE achieved", study.best_value)

Best parameters found:  {'alpha': 0.0001519866894121754, 'l1_ratio': 0.6057601265861304}
Best MSE achieved 0.020590204725694842


In [195]:
# Training final model with best parameters
best_params = study.best_params
final_model = ElasticNet(alpha=best_params['alpha'], l1_ratio=best_params['l1_ratio'], random_state=42)
# Training model on entire set
final_model.fit(X_train, Y_train)

# Model Evaluation
y_pred = final_model.predict(X_test)
mse = mean_squared_error(Y_test, y_pred)
r2 = r2_score(Y_test, y_pred)
print(f"Test MSE with optimized hyperparameters: {mse}")


Test MSE with optimized hyperparameters: 0.020590204725694842


In [196]:
r2 = r2_score(Y_test, y_pred)
print(f"Test r2 with optimized hyperparameters: {r2}")

Test r2 with optimized hyperparameters: 0.6717206488290728


In [197]:
# Saving model for use with website

with open('elastic_net_model.pkl', 'wb') as file:
    pickle.dump(final_model, file)

## Model Testing

There are discrepancies between dataset prices and estimated price outputs.

In [205]:
print(f"Min price: {train_df['price'].min()}")
print(f"Max price: {train_df['price'].max()}")

Min price: 0.0
Max price: 1.0


NameError: name 'prediction_scaled' is not defined