# TRAINING APARTMENTS

In [1]:
import os
import numpy as np
import pandas as pd
pd.set_option('display.max_columns', None)
from common_functions_training import *
from xgboost import XGBRegressor
import seaborn as sns
import matplotlib.pyplot as plt
os.chdir("../..")
from sibr_module import BigQuery, Logger, CStorage
from src.sibr_market_training import Train



In [2]:
dataset = 'homes'
logger = Logger(f'train{dataset.capitalize()}')
bq = BigQuery(logger=logger, dataset=dataset)
cs = CStorage(logger=logger, bucket_name='sibr-market')
save_to_gc = True
t = Train(dataset='homes',logger = logger,log_target=True)

2025-08-05 12:59:59,392 - trainHomes - INFO - Cloud Logging is disabled. Using local logging to /Users/sigvardbratlie/Documents/Projects/sibr_market_training/logfiles/trainHomes.log
2025-08-05 12:59:59,396 - trainHomes - INFO - BigQuery client initialized with project_id: sibr-market
2025-08-05 12:59:59,398 - trainHomes - INFO - Google Cloud Storage client initialized with bucket: sibr-market
2025-08-05 12:59:59,401 - trainHomes - INFO - BigQuery client initialized with project_id: sibr-market
2025-08-05 12:59:59,403 - trainHomes - INFO - Google Cloud Storage client initialized with bucket: sibr-market
2025-08-05 12:59:59,404 - trainHomes - DEBUG - Dataset: homes | | Replace: False


In [3]:
a_n = ['ownership_type_annet', 'utdanning_Personer', 'utdanning_PersonerProsent', 'ownership_type_obligasjon']
h_n = ['property_type_bygård_flermannsbolig', 'ownership_type_obligasjon', 'ownership_type_annet', 'eq_lift_tmp_True', 'utdanning_PersonerProsent']
o_n = ['eq_aircondition_True', 'ownership_type_annet', 'ownership_type_obligasjon']
data = t.bq.read_homes(task = "train")
df_a = data.get("homes_apartments")
df_h = data.get("homes_houses")
df_o = data.get("homes_oslo")

2025-08-05 13:00:10,236 - trainHomes - INFO - 100732 rows read from homes. Query: 
                            SELECT
                a.*,
                CASE
                    WH... (truncated)
2025-08-05 13:00:17,971 - trainHomes - INFO - 81995 rows read from homes. Query: 
                            SELECT
                a.*,
                CASE
                    WH... (truncated)
2025-08-05 13:00:30,427 - trainHomes - INFO - 29553 rows read from homes. Query: 
            WITH OsloHomes AS (
                SELECT
                h.*,
                go.BYDE... (truncated)


## Model Training with XGBoost | Apartments

In [4]:
params_a = {'model__colsample_bytree': np.float64(0.7669644012595116), 'model__learning_rate': np.float64(0.0212853192163843), 'model__max_depth': 5, 'model__n_estimators': 1357, 'model__random_state': 43, 'model__reg_lambda': np.float64(8.974374279504338e-07), 'model__subsample': np.float64(0.6673164168691722)}
pipline_a = t.train(df = df_a,
                       params = params_a,
                       target='price',
                       data_name='homes_apartments',
                       model=XGBRegressor,
                       save_to_gc=save_to_gc,
                       log_target = True)

2025-08-05 13:05:12,541 - trainHomes - INFO - 
 
TRAINING XGBRegressor model for HOMES_APARTMENTS
2025-08-05 13:05:12,582 - trainHomes - INFO - Train set size: 80585, Test set size: 20147
2025-08-05 13:05:12,582 - trainHomes - INFO - Target: price and log_target: True
2025-08-05 13:05:18,226 - trainHomes - INFO - MSE test: 598873276416.0,r2 test: 0.9386155605316162, mse train: 467523010560.0, r2 train 0.9526061415672302 for homes_apartments with target price and log_target True
2025-08-05 13:05:18,667 - trainHomes - INFO - Blob models.json downloaded to /tmp/models.json.
2025-08-05 13:05:18,674 - trainHomes - INFO - Oppdaterer eksisterende modell 'homes_apartments' i manifestet.
2025-08-05 13:05:18,932 - trainHomes - INFO - File /tmp/models.json uploaded to models.json in bucket sibr-market.
2025-08-05 13:05:19,490 - trainHomes - INFO - File /tmp/tmp_file.pkl uploaded to models/XGBRegressor_homes_apartments.pkl in bucket sibr-market.


In [7]:
params_h = {'model__colsample_bytree': np.float64(0.7669644012595116), 'model__learning_rate': np.float64(0.0212853192163843), 'model__max_depth': 5, 'model__n_estimators': 1357, 'model__random_state': 43, 'model__reg_lambda': np.float64(8.974374279504338e-07), 'model__subsample': np.float64(0.6673164168691722)}
pipeline_h = t.train(df = df_h,
                        params = params_h,
                        target='price',
                        data_name='homes_houses',
                        model=XGBRegressor,
                        save_to_gc=save_to_gc,
                        log_target = True)

2025-08-05 13:45:50,806 - trainHomes - INFO - 
 
TRAINING XGBRegressor model for HOMES_HOUSES
2025-08-05 13:45:50,847 - trainHomes - INFO - Train set size: 65596, Test set size: 16399
2025-08-05 13:45:50,848 - trainHomes - INFO - Target: price and log_target: True
2025-08-05 13:45:55,746 - trainHomes - INFO - MSE test: 1923616473088.0,r2 test: 0.8864208459854126, mse train: 1551114174464.0, r2 train 0.9078566431999207 for homes_houses with target price and log_target True
2025-08-05 13:45:55,951 - trainHomes - INFO - Blob models.json downloaded to /tmp/models.json.
2025-08-05 13:45:55,958 - trainHomes - INFO - Oppdaterer eksisterende modell 'homes_houses' i manifestet.
2025-08-05 13:45:56,142 - trainHomes - INFO - File /tmp/models.json uploaded to models.json in bucket sibr-market.
2025-08-05 13:45:56,664 - trainHomes - INFO - File /tmp/tmp_file.pkl uploaded to models/XGBRegressor_homes_houses.pkl in bucket sibr-market.


In [8]:
params_o = {'model__learning_rate': np.float64(0.03750796359625606), 'model__max_depth': 4, 'model__n_estimators': 978, 'model__random_state': 52, 'model__subsample': np.float64(0.8347004662655393)}
pipeline_o = t.train(df = df_o,
                        params = params_o,
                        target='price',
                        data_name='homes_apartments_oslo',
                        model=XGBRegressor,
                        save_to_gc=save_to_gc,
                        log_target = True)

2025-08-02 22:10:25,802 - trainHomes - INFO - 
 
TRAINING XGBRegressor model for HOMES_APARTMENTS_OSLO
2025-08-02 22:10:25,815 - trainHomes - INFO - Train set size: 23102, Test set size: 5776
2025-08-02 22:10:25,816 - trainHomes - INFO - Target: price and log_target: True
2025-08-02 22:10:27,745 - trainHomes - INFO - MSE test: 774767575040.0,r2 test: 0.9316033124923706, mse train: 466613174272.0, r2 train 0.9589529037475586 for homes_apartments_oslo with target price and log_target True
2025-08-02 22:10:27,836 - trainHomes - INFO - Blob models.json downloaded to /tmp/models.json.
2025-08-02 22:10:27,956 - trainHomes - INFO - File /tmp/models.json uploaded to models.json in bucket sibr-market.
2025-08-02 22:10:28,343 - trainHomes - INFO - File /tmp/tmp_file.pkl uploaded to models/XGBRegressor_homes_apartments_oslo.pkl in bucket sibr-market.


In [8]:
results = cs.download('models.json',read_in_file=True)

2025-08-05 13:52:37,329 - trainHomes - INFO - Read in models.json


In [9]:
results[['dataset','r2_score','r2_train']]

Unnamed: 0,dataset,r2_score,r2_train
0,cars_el,0.979806,0.984906
1,cars_fossil,0.968327,0.968209
2,homes_apartments_oslo,0.936778,0.955281
3,rentals,0.842147,0.879048
4,rentals_oslo,0.866926,0.908223
5,rentals_co-living,0.34284,0.63536
6,homes_apartments,0.938616,0.952606
7,homes_houses,0.886421,0.907857
