In [89]:
import joblib 
import tensorflow as tf
import pickle
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import pandas as pd
tfk = tf.keras

In [3]:
rf_mdl = joblib.load("saved_models/rf_price_regressor.sav")

In [5]:
nn_mdl = tfk.models.load_model("saved_models/final_embed_mdl.h5",
                              custom_objects={"leaky_relu":tf.nn.leaky_relu})

In [8]:
tree_data = pd.read_csv("../Processed_Data/car_onehot_data.csv")

In [37]:
features, y = tree_data.drop("price",axis=1), tree_data.price

In [12]:
pred_tree = rf_mdl.predict(features)

In [13]:
nn_data_1 = pd.read_csv("../Processed_Data/car_data_merged.csv").drop("binned_year",axis=1)

In [14]:
nn_data_2 = pd.read_csv("../Processed_Data/car_data_raw.csv")["year"]

In [15]:
nn_data = nn_data_1.merge(nn_data_2,left_index=True,right_index=True)

In [38]:
features_nn,y_nn = nn_data.drop("price",axis=1),nn_data.price

In [19]:
with open("saved_models/cate_map.pkl","rb") as f:
    cate_map = pickle.load(f)

In [22]:
EMBED_COLS = ["model","transmission","brand","fuelType","year","mil_cat","binned_mpg","engine_binned"]

In [28]:
def data_convert(inputs):
    cate_feature_list = []
    for col in EMBED_COLS:
        cate_feature_list.append(inputs[col].map(cate_map[col]).fillna(0).values)
    return cate_feature_list

In [39]:
features_nn_list = data_convert(features_nn)

In [45]:
import numpy as np
np.all(y_nn == y)

True

In [46]:
nn_pred = nn_mdl.predict(features_nn_list,batch_size=512)

In [73]:
r2_score(y_nn,nn_pred.flatten())

0.9535953420924875

In [51]:
r2_score(y, pred_tree)

0.9544754703908783

In [71]:
combined = nn_pred.flatten()*0.5 + pred_tree*0.5

In [74]:
r2_score(y,combined)

0.957517875711409

In [69]:
pred_tree.shape

(99186,)

In [106]:

highest = 0
for ratio in np.arange(0.0,1.05,0.05):
    combined = nn_pred.flatten()*ratio + pred_tree*(1-ratio)
    score = r2_score(y_nn,combined)
    if (highest < score ):
        print(f"{score} : {ratio}")
        highest = score

0.9544754703908783 : 0.0
0.9550931331868288 : 0.05
0.9556411465914821 : 0.1
0.9561195106458565 : 0.15000000000000002
0.9565282252268973 : 0.2
0.9568672904185751 : 0.25
0.9571367063546814 : 0.30000000000000004
0.9573364726624928 : 0.35000000000000003
0.9574665898021618 : 0.4
0.9575270573197281 : 0.45


In [105]:
lowest = 2500
for ratio in np.arange(0.0,1.05,0.05):
    combined = nn_pred.flatten()*ratio + pred_tree*(1-ratio)
    score = np.sqrt(mean_squared_error(y_nn,combined))
    if (lowest > score ):
        print(f"{score} : {ratio}")
        lowest = score

2105.210357315049 : 0.0
2090.8801615391635 : 0.05
2078.083147757314 : 0.1
2066.847793568279 : 0.15000000000000002
2057.199688959005 : 0.2
2049.1612518889574 : 0.25
2042.7514818801278 : 0.30000000000000004
2037.985755097286 : 0.35000000000000003
2034.875611730179 : 0.4
2033.4286593391807 : 0.45


In [104]:
lowest = 500 
for ratio in np.arange(0.0,1.05,0.05):
    combined = nn_pred.flatten()*ratio + pred_tree*(1-ratio)
    score = np.max(np.abs(y-combined)/y)
    if (lowest > score ):
        print(f"{score} : {ratio}")
        lowest = score

4.961258158353994 : 0.0
4.676192968823474 : 0.05
4.391127779292955 : 0.1
4.10606259188171 : 0.15000000000000002
3.8209974002319145 : 0.2
3.535932208582119 : 0.25
3.2508670254094274 : 0.30000000000000004
3.14942218692841 : 0.35000000000000003
3.1373220965597066 : 0.4
3.1252218302971815 : 0.45
3.1131217399284776 : 0.5
3.1010216495597747 : 0.55
3.0889215591910713 : 0.6000000000000001
3.0768212929285452 : 0.65
3.0647212025598423 : 0.7000000000000001
3.052621112191139 : 0.75


### Ratio is 0.55

In [122]:
mdl_score = nn_pred.flatten()*0.55+pred_tree*(1-0.55)

In [123]:
r2_score(y,mdl_score)

0.9574390447138389

In [124]:
np.sqrt(mean_squared_error(y,mdl_score))

2035.5344086189054

In [125]:
mean_absolute_error(y,mdl_score)

1295.3861573898191

In [126]:
np.max(np.abs(y-mdl_score)/y)

3.1010216495597747

In [129]:
features_nn

Unnamed: 0,model,transmission,fuelType,brand,mil_cat,binned_mpg,engine_binned,year
0,GT86,Manual,Petrol,toyota,2,1,0,2016
1,GT86,Manual,Petrol,toyota,2,1,0,2017
2,GT86,Manual,Petrol,toyota,2,1,0,2015
3,GT86,Manual,Petrol,toyota,2,1,0,2017
4,GT86,Manual,Petrol,toyota,3,1,0,2017
...,...,...,...,...,...,...,...,...
99181,A3,Manual,Petrol,audi,1,2,0,2020
99182,A3,Manual,Petrol,audi,0,2,0,2020
99183,A3,Manual,Petrol,audi,0,2,0,2020
99184,Q3,Automatic,Petrol,audi,1,2,0,2017


In [130]:
features

Unnamed: 0,binned_year,mil_cat,binned_mpg,engine_binned,model_ 1 Series,model_ 2 Series,model_ 3 Series,model_ 4 Series,model_ 5 Series,model_ 6 Series,...,fuelType_Petrol,brand_audi,brand_bmw,brand_ford,brand_hyundi,brand_merc,brand_skoda,brand_toyota,brand_vauxhall,brand_vw
0,3,2,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
1,3,2,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
2,2,2,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
3,3,2,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
4,3,3,1,0,0,0,0,0,0,0,...,1,0,0,0,0,0,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
99181,4,1,2,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
99182,4,0,2,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
99183,4,0,2,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
99184,3,1,2,0,0,0,0,0,0,0,...,1,1,0,0,0,0,0,0,0,0
