In [1]:
%pip install boto3 xgboost

Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 24.2 -> 24.3.1
[notice] To update, run: python.exe -m pip install --upgrade pip


Load dependencies

In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor
from sklearn.model_selection import train_test_split

Loading data

In [2]:
csv = pd.read_csv('data/energy_bill_data.csv')

X = csv.drop('amount_paid', axis=1)
X['housearea'] = X['housearea'] * 0.3
X['ave_monthly_income'] = X['ave_monthly_income'] / 8
X['city'] = 'Rijeka'

def determine_cities(value):
    if str(value) == 'Rijeka':
        return 1
    if str(value) == 'Zagreb':
        return 2
    if str(value) == 'Timisoara':
        return 3

X['city'] = X['city'].apply(determine_cities)

y = csv['amount_paid']
y = y / 7.5

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [3]:
average_monthly_income = (X['ave_monthly_income'] / 8).mean()

print(average_monthly_income)

385.70291484374997


Inspect data

In [4]:
X_test.head()
# y_test.head()

Unnamed: 0,num_rooms,num_people,housearea,is_ac,is_tv,is_flat,ave_monthly_income,num_children,is_urban,city
521,2,2,228.279,0,1,0,2246.76375,1,1,1
737,2,2,223.665,0,0,1,1849.305,0,1,1
740,1,6,259.824,1,1,0,1619.92125,1,1,1
660,1,6,268.956,1,1,1,1356.6775,0,1,1
411,1,9,263.901,0,1,0,4293.2675,1,0,1


Set parameters

In [5]:
N_ESTIMATORS = 67
MAX_DEPTH = 2
LEARNING_RATE = 0.26
MIN_CHILD_WEIGHT = 4
GAMMA = 0.18

POPULATION_SIZE = 2000
NUM_GENERATIONS = 200
NUMBER_OF_CHILDREN = 1000
MUTATION_CHANCE = 0.2

Setup model

In [6]:
xgb_reg = XGBRegressor(
    n_estimators=N_ESTIMATORS,
    max_depth=MAX_DEPTH,
    learning_rate=LEARNING_RATE,
    min_child_weight=MIN_CHILD_WEIGHT,
    gamma=GAMMA
)

Train model

In [7]:
xgb_reg.fit(X_train, y_train)

Predict test data

In [8]:
y_pred = xgb_reg.predict(X_test)

regression metrics for fitness evaluation, e.g., mean squared error (MSE)

In [9]:
rmse = np.sqrt(mean_squared_error(y_test, y_pred))

rmse

8.364980348248551

Create model.json

In [11]:
model_path = './CostOfLivingPredictor/model.json'
xgb_reg.save_model(model_path)