In [10]:
#sklearn libraries
from sklearn.preprocessing import OneHotEncoder, StandardScaler, PolynomialFeatures
from sklearn.model_selection import cross_val_score, train_test_split, StratifiedKFold
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_absolute_percentage_error

#models
from lightgbm import LGBMRegressor

#pandas
import pandas as pd
import numpy as np

#optimization
import optuna

#ploting lib
import plotly.express as px

#python libraries
import pickle
import json
import glob

In [3]:
class CONFIG:
    numeric_features= ["no_units","area","unit_price","sale_year", "tenure_yrs"]
    categorical_features = ['type','post_district','market_segment',
                            'type_sale','type_area','floor_level', 'sale_month']

In [6]:
model_fps = glob.glob("model_pipeline*.pkl")
model_fps

['model_pipeline_fold_0.pkl',
 'model_pipeline_fold_1.pkl',
 'model_pipeline_fold_2.pkl',
 'model_pipeline_fold_3.pkl',
 'model_pipeline_fold_4.pkl',
 'model_pipeline_fold_5.pkl',
 'model_pipeline_fold_6.pkl',
 'model_pipeline_fold_7.pkl',
 'model_pipeline_fold_8.pkl',
 'model_pipeline_fold_9.pkl']

In [7]:
test_data = pd.read_csv("../data/pr_test_df.csv")
test_features = test_data[CONFIG.numeric_features+CONFIG.categorical_features]
test_features

Unnamed: 0,no_units,area,unit_price,sale_year,tenure_yrs,type,post_district,market_segment,type_sale,type_area,floor_level,sale_month
0,1.0,807.0,1714.0,2022,999999,Apartment,5,RCR,New Sale,Strata,01 to 05,6
1,1.0,484.0,1689.0,2022,999999,Apartment,5,RCR,Resale,Strata,01 to 05,6
2,1.0,484.0,1796.0,2022,999999,Apartment,5,RCR,New Sale,Strata,01 to 05,6
3,1.0,484.0,1811.0,2022,999999,Apartment,5,RCR,New Sale,Strata,01 to 05,6
4,1.0,484.0,1796.0,2022,999999,Apartment,5,RCR,New Sale,Strata,01 to 05,6
...,...,...,...,...,...,...,...,...,...,...,...,...
117339,1.0,947.0,732.0,2022,99,Executive Condominium,27,OCR,New Sale,Strata,01 to 05,6
117340,1.0,883.0,832.0,2022,99,Executive Condominium,27,OCR,New Sale,Strata,06 to 10,6
117341,1.0,775.0,805.0,2022,99,Executive Condominium,27,OCR,New Sale,Strata,01 to 05,6
117342,1.0,775.0,802.0,2022,99,Executive Condominium,27,OCR,New Sale,Strata,01 to 05,6


In [8]:
def get_prediction(model_fp, features):
    with open(model_fp, "rb") as pklfile:
        model_pipeline = pickle.load(pklfile)
        
    preds = model_pipeline.predict(features)
    return preds

In [11]:
all_preds=[]
for num_fold, model_fp in enumerate(model_fps):
    print("Predicting for fold: ", num_fold)
    all_preds.append(get_prediction(model_fp, test_features))

In [16]:
mean_pred = np.mean(all_preds, axis=0)
test_data['predicted_resale_price'] = mean_pred

In [17]:
test_data.to_csv("../data/predicted_resale_price.csv",index=False)