# Performance comparison

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import r2_score
import pickle

In [2]:
df = pd.read_csv('data/laptop_test.csv')
df = df.dropna()

In [3]:
df.head()

Unnamed: 0,company,product,typename,inches,cpu,ram(GB),gpu,opsys,weight(kg),resolution,screentype,touchscreen,cpu(GHz),memory_1_storage_gb,memory_1_type,memory_2_storage_gb,memory_2_type,log_price
0,16,334,3,13.3,57,8,53,5,1.05,3,0,0,2.5,256.0,3,0.0,3,7.421776
1,7,417,3,13.3,57,8,53,5,1.11,3,0,0,2.5,256.0,3,0.0,3,7.046647
2,10,202,3,15.6,0,6,18,5,2.4,3,2,0,2.4,1000.0,1,0.0,3,6.212606
3,10,225,3,15.6,69,8,89,5,2.6,3,0,0,2.6,1000.0,1,0.0,3,6.801283
4,7,110,4,12.5,52,4,47,5,1.26,0,2,0,2.3,256.0,3,0.0,3,7.126087


In [4]:
target = "log_price"
numericFeatures = ["inches", "ram(GB)", "weight(kg)", "cpu(GHz)", "memory_1_storage_gb", "memory_2_storage_gb"]
catFeatures = ["company", "product", "typename", "cpu", "gpu", "opsys", "resolution", "screentype", "touchscreen", "memory_1_type", "memory_2_type"]

features = df.columns.tolist()
features.remove(target)

## Predictions on test data
Never seen dataest

### Random Forest

In [19]:
random_forest = pickle.load(open("models/random_forest.sav", 'rb'))
random_forest_pred = random_forest.predict(df[features])
random_forest_r2 = r2_score(random_forest_pred, df[target])
print("Random Forest\nR2: {:.4f} %".format(random_forest_r2*100))

Random Forest
R2: 93.2558 %


### XGBoost

In [20]:
xgb = pickle.load(open("models/xgboost.sav", 'rb'))
xgb_pred = xgb.predict(df[features])
xgb_r2 = r2_score(xgb_pred, df[target])
print("XGBoost\nR2: {:.4f} %".format(xgb_r2*100))

XGBoost
R2: 91.6318 %


### Neural Network

In [14]:
import pickle
from keras.models import load_model

In [15]:
df_nn = df.copy()

In [16]:
scaler = pickle.load(open('scaler/sc_nn.pkl','rb'))

X_test_scaled = scaler.transform(df_nn[features])

In [22]:
def create_model( nl1=1, nl2=1,  nl3=1, 
                 nn1=1000, nn2=500, nn3 = 200, lr=0.01, momentum = 0.9,  decay=0., l1=0.01, l2=0.01,
                act = 'relu', dropout=0, input_shape=1000, output_shape=1):
    
    opt = keras.optimizers.SGD(lr=lr, momentum=momentum)
    reg = keras.regularizers.l1_l2(l1=l1, l2=l2)
                                                     
    model = models.Sequential()
    
    # for the firt layer we need to specify the input dimensions
    first=True
    
    for i in range(nl1):
        if first:
            model.add(layers.Dense(nn1, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(layers.Dense(nn1, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(layers.Dropout(dropout))
            
    for i in range(nl2):
        if first:
            model.add(layers.Dense(nn2, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(layers.Dense(nn2, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(layers.Dropout(dropout))
            
    for i in range(nl3):
        if first:
            model.add(layers.Dense(nn3, input_dim=input_shape, activation=act, kernel_regularizer=reg))
            first=False
        else: 
            model.add(layers.Dense(nn3, activation=act, kernel_regularizer=reg))
        if dropout!=0:
            model.add(layers.Dropout(dropout))
            
    model.add(layers.Dense(1))
    model.compile(loss='mse', optimizer=opt, metrics=['mae'],)
    return model

In [27]:
from keras.wrappers.scikit_learn import KerasRegressor
neural_network = KerasRegressor(build_fn=create_model, epochs=100, batch_size=32, verbose=1)

neural_network.model = load_model('models/nn.h5')

neural_network_preds = neural_network.predict(X_test_scaled)
neural_network_preds

  neural_network = KerasRegressor(build_fn=create_model, epochs=100, batch_size=32, verbose=1)




array([7.1421566, 7.0317883, 5.9706273, 6.801144 , 7.1081395, 6.974774 ,
       6.832797 , 5.9673543, 7.34105  , 5.6879096, 7.9339056, 6.4437485,
       7.490829 , 6.5773406, 7.5080333, 6.839004 , 7.1936827, 6.9406724,
       6.85024  , 7.8126516, 7.926122 , 6.536908 , 7.1194916, 5.716884 ,
       7.061089 , 6.6591487, 6.667244 , 6.598046 , 7.421298 , 7.0144553,
       6.174635 , 7.4343076, 7.4985256, 6.915441 , 7.0507407, 6.5701795,
       6.565206 , 6.1132674, 7.145263 , 5.7452054, 7.5490246, 6.2667317,
       6.4336205, 7.8575473, 6.3592005, 6.9912324, 7.5103927, 5.429512 ,
       6.7449346, 7.270629 , 7.3596883, 6.3274035, 6.824941 , 6.7615004,
       6.2851834, 6.9860616, 5.4293404, 6.8826833, 6.833511 , 6.801144 ,
       7.3963027, 6.8211327, 6.208205 , 6.410998 , 5.6321416, 7.5299907,
       6.1244   , 6.774446 , 6.9196024, 7.851482 , 8.091667 , 6.6719975,
       6.5899425, 6.607064 , 5.9132323, 5.9403653, 6.626693 , 7.0731616,
       7.1000338, 7.4712825, 7.137821 , 6.873023 , 

In [29]:
neural_network_r2 = r2_score(neural_network_preds, df_nn[target])
print("Neural Network\nR2: {:.4f} %".format(neural_network_r2*100))

Neural Network
R2: 86.5452 %


## Summary

In [67]:
print("Random Forest\nR2: {0:.1f} %\n\nXGBoost\nR2: {1:.1f} %\n\nNeural Network\nR2: {2:.1f} %".format(random_forest_r2*100, xgb_r2*100, neural_network_r2*100,1))

Random Forest
R2: 93.3 %

XGBoost
R2: 91.6 %

Neural Network
R2: 86.5 %


In [63]:
final_results = pd.DataFrame({
    'Random Forest': np.round(np.exp(random_forest_pred)).astype('int64'),
    'XGBoost': np.round(np.exp(xgb_pred)).astype('int64'),
    'Neural Network': np.round(np.exp(neural_network_preds)).astype('int64'),
    'Real Values': np.round(np.exp(df_nn[target])).astype('int64')})
final_results

Unnamed: 0,Random Forest,XGBoost,Neural Network,Real Values
0,1247,1364,1264,1672
1,1144,1237,1132,1149
2,521,448,392,499
3,899,825,899,899
4,1484,1312,1222,1244
...,...,...,...,...
126,1337,1328,1111,1099
127,570,618,751,649
128,1518,1854,1755,1379
129,1151,1165,1522,699
