In [1]:
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
import keras.backend as K

Using TensorFlow backend.


In [2]:
def mean_absolute_precision_error(y_pred, y_true):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
BATCH_SIZE   = 100 

LAYER_1_SIZE = 512
LAYER_2_SIZE = 128
#LAYER_3_SIZE = 1024
#LAYER_3_SIZE = 512
LAYER_4_SIZE = 16
dropout_rate = 0.

FEATURE_TYPE = 'cont_and_pred'  # 'cont' 'pred' 'all' 'all_but_pred' 'cont_and_pred' 'all_but_state_cols'
SEED         = 7

In [4]:
test   = pd.read_pickle('dataFrames/test_OneHotEncoding_new_June14th.pkl')
train  = pd.read_pickle('dataFrames/train_OneHotEncoding_new_June14th.pkl')

continuous_cols = ['destinationLatitude', 'destinationLongitude', 'distanceKM', 'sourceLatitude', 
                   'sourceLongitude', 'taxiDurationMin', 'weight', 'source', 'destination'] 

pred_cols = ['y_avg_lgb_xgb','y_gboost','y_xgb','y_bag','y_knn','y_dec','y_lgb']

vehicle_cols = ['vehicleType_joft', 'vehicleType_khavar', 'vehicleType_tak', 'vehicleType_treili', 
                'vehicleOption_bari', 'vehicleOption_hichkodam', 'vehicleOption_kafi',
                'vehicleOption_kompressi', 'vehicleOption_labehdar','vehicleOption_mosaghaf_chadori', 
                'vehicleOption_mosaghaf_felezi', 'vehicleOption_transit_chadori', 'vehicleOption_yakhchali']

#pred_cols = [ 'y_gboost', 'y_xgb', 'y_lgb']

categorical_cols = train.columns.drop(continuous_cols + pred_cols + vehicle_cols + ['ID', 'price']).tolist()

NOM = train[categorical_cols].shape[1]
renaming_dict = dict(zip(train[categorical_cols].columns, [str(x) for x in list(range(NOM)) ]))

train_renamed = train[categorical_cols].rename(columns=renaming_dict)
test_renamed  = test[categorical_cols].rename(columns=renaming_dict)

for column in continuous_cols + pred_cols + vehicle_cols:
    train_renamed[column] = train[column]
    test_renamed[column] = test[column]
    
test_renamed['ID']   = test['ID']
train_renamed['ID'] = train['ID']
test_renamed['price'] = test['price']
train_renamed['price'] = train['price']

X_train, X_val = train_test_split(train_renamed, test_size=0.2, random_state=42)
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,vehicleOption_hichkodam,vehicleOption_kafi,vehicleOption_kompressi,vehicleOption_labehdar,vehicleOption_mosaghaf_chadori,vehicleOption_mosaghaf_felezi,vehicleOption_transit_chadori,vehicleOption_yakhchali,ID,price
6987,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,52642600134,24500000.0
42079,0,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,52386856932,9000000.0
36862,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,19056360759,8000000.0
28387,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,54034736201,3450000.0
35474,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,64195974128,3300000.0


In [5]:
if FEATURE_TYPE=='all':
    X_input_train = X_train.drop(['ID','price'], axis=1)
    X_input_val   = X_val.drop(['ID','price'], axis=1)
    train_input   = train_renamed.drop(['ID','price'], axis=1)
    test_input   = test_renamed.drop(['ID','price'], axis=1)
elif FEATURE_TYPE=='all_but_pred':
    X_input_train = X_train.drop(['ID','price'] + pred_cols, axis=1)
    X_input_val   = X_val.drop(['ID','price'] + pred_cols, axis=1)
    train_input   = train_renamed.drop(['ID','price'] + pred_cols, axis=1)
    test_input   = test_renamed.drop(['ID','price'] + pred_cols, axis=1)
elif FEATURE_TYPE=='all_but_state_cols':
    X_input_train = X_train[continuous_cols + pred_cols + vehicle_cols]
    X_input_val   = X_val[continuous_cols + pred_cols + vehicle_cols]
    train_input   = train_renamed[continuous_cols + pred_cols + vehicle_cols]
    test_input    = test_renamed[continuous_cols + pred_cols + vehicle_cols]
elif FEATURE_TYPE=='cont_and_pred':
    X_input_train = X_train[continuous_cols + pred_cols]
    X_input_val   = X_val[continuous_cols + pred_cols]
    train_input   = train_renamed[continuous_cols + pred_cols]
    test_input    = test_renamed[continuous_cols + pred_cols]
elif FEATURE_TYPE=='pred':
    X_input_train = X_train[pred_cols]
    X_input_val   = X_val[pred_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]
elif FEATURE_TYPE=='cont':
    X_input_train = X_train[continuous_cols]
    X_input_val   = X_val[continuous_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]
    
INPUT_SHAPE  = X_input_train.shape[1]
y_input_train = X_train.price

seed = SEED
np.random.seed(seed)

In [6]:
def MAPE_loss(y_true, y_pred):
    return K.mean(K.abs((y_true - y_pred) / y_true)) * 100

In [7]:
def base_model():
    model = Sequential()
    model.add(Dense(LAYER_1_SIZE, input_dim=INPUT_SHAPE, init='lecun_uniform', activation='relu'))
    model.add(Dropout(dropout_rate)) 
    model.add(Dense(LAYER_2_SIZE, init='lecun_uniform', activation='relu'))
    model.add(Dropout(dropout_rate)) 
    #model.add(Dense(LAYER_3_SIZE, init='normal', activation='relu'))
    #model.add(Dropout(dropout_rate))
    model.add(Dense(LAYER_4_SIZE, init='lecun_uniform', activation='relu'))
    #model.add(Dropout(dropout_rate))
    #model.add(Dense(LAYER_5_SIZE, init='normal', activation='relu'))
    model.add(Dense(1, init='lecun_uniform'))
    model.compile(loss=MAPE_loss, optimizer = 'adam')
    return model

In [8]:
#INPUT_SHAPE  = X_input_train.shape[1]
#clf_val = KerasRegressor(build_fn=base_model,  batch_size= 100, verbose=1, epochs=250)
#clf_val.fit(X_input_train,y_input_train)
#preds_val = clf_val.predict(X_input_val)
#score = mean_absolute_precision_error(preds_val, X_val.price)
#print('%.2f' % score)

# Final

In [9]:
INPUT_SHAPE  = train_input.shape[1]
clf = KerasRegressor(build_fn=base_model, epochs= 350, batch_size=100, verbose=1)
clf.fit(train_input,train_renamed.price)

Epoch 1/350
Epoch 2/350
Epoch 3/350
Epoch 4/350
Epoch 5/350
Epoch 6/350
Epoch 7/350
Epoch 8/350
Epoch 9/350
Epoch 10/350
Epoch 11/350
Epoch 12/350
Epoch 13/350
Epoch 14/350
Epoch 15/350
Epoch 16/350
Epoch 17/350
Epoch 18/350
Epoch 19/350
Epoch 20/350
Epoch 21/350
Epoch 22/350
Epoch 23/350
Epoch 24/350
Epoch 25/350
Epoch 26/350
Epoch 27/350
Epoch 28/350
Epoch 29/350
Epoch 30/350
Epoch 31/350
Epoch 32/350
Epoch 33/350
Epoch 34/350
Epoch 35/350
Epoch 36/350
Epoch 37/350
Epoch 38/350
Epoch 39/350
Epoch 40/350
Epoch 41/350
Epoch 42/350
Epoch 43/350
Epoch 44/350
Epoch 45/350
Epoch 46/350
Epoch 47/350
Epoch 48/350
Epoch 49/350
Epoch 50/350
Epoch 51/350
Epoch 52/350
Epoch 53/350
Epoch 54/350
Epoch 55/350
Epoch 56/350
Epoch 57/350
Epoch 58/350
Epoch 59/350
Epoch 60/350
Epoch 61/350
Epoch 62/350
Epoch 63/350
Epoch 64/350
Epoch 65/350
Epoch 66/350
Epoch 67/350
Epoch 68/350
Epoch 69/350
Epoch 70/350
Epoch 71/350
Epoch 72/350
Epoch 73/350
Epoch 74/350
Epoch 75/350
Epoch 76/350
Epoch 77/350
Epoch 78

Epoch 94/350
Epoch 95/350
Epoch 96/350
Epoch 97/350
Epoch 98/350
Epoch 99/350
Epoch 100/350
Epoch 101/350
Epoch 102/350
Epoch 103/350
Epoch 104/350
Epoch 105/350
Epoch 106/350
Epoch 107/350
Epoch 108/350
Epoch 109/350
Epoch 110/350
Epoch 111/350
Epoch 112/350
Epoch 113/350
Epoch 114/350
Epoch 115/350
Epoch 116/350
Epoch 117/350
Epoch 118/350
Epoch 119/350
Epoch 120/350
Epoch 121/350
Epoch 122/350
Epoch 123/350
Epoch 124/350
Epoch 125/350
Epoch 126/350
Epoch 127/350
Epoch 128/350
Epoch 129/350
Epoch 130/350
Epoch 131/350
Epoch 132/350
Epoch 133/350
Epoch 134/350
Epoch 135/350
Epoch 136/350
Epoch 137/350
Epoch 138/350
Epoch 139/350
Epoch 140/350
Epoch 141/350
Epoch 142/350
Epoch 143/350
Epoch 144/350
Epoch 145/350
Epoch 146/350
Epoch 147/350
Epoch 148/350
Epoch 149/350
Epoch 150/350
Epoch 151/350
Epoch 152/350
Epoch 153/350
Epoch 154/350
Epoch 155/350
Epoch 156/350
Epoch 157/350
Epoch 158/350
Epoch 159/350
Epoch 160/350
Epoch 161/350
Epoch 162/350
Epoch 163/350
Epoch 164/350
Epoch 165/35

Epoch 185/350
Epoch 186/350
Epoch 187/350
Epoch 188/350
Epoch 189/350
Epoch 190/350
Epoch 191/350
Epoch 192/350
Epoch 193/350
Epoch 194/350
Epoch 195/350
Epoch 196/350
Epoch 197/350
Epoch 198/350
Epoch 199/350
Epoch 200/350
Epoch 201/350
Epoch 202/350
Epoch 203/350
Epoch 204/350
Epoch 205/350
Epoch 206/350
Epoch 207/350
Epoch 208/350
Epoch 209/350
Epoch 210/350
Epoch 211/350
Epoch 212/350
Epoch 213/350
Epoch 214/350
Epoch 215/350
Epoch 216/350
Epoch 217/350
Epoch 218/350
Epoch 219/350
Epoch 220/350
Epoch 221/350
Epoch 222/350
Epoch 223/350
Epoch 224/350
Epoch 225/350
Epoch 226/350
Epoch 227/350
Epoch 228/350
Epoch 229/350
Epoch 230/350
Epoch 231/350
Epoch 232/350
Epoch 233/350
Epoch 234/350
Epoch 235/350
Epoch 236/350
Epoch 237/350
Epoch 238/350
Epoch 239/350
Epoch 240/350
Epoch 241/350
Epoch 242/350
Epoch 243/350
Epoch 244/350
Epoch 245/350
Epoch 246/350
Epoch 247/350
Epoch 248/350
Epoch 249/350
Epoch 250/350
Epoch 251/350
Epoch 252/350
Epoch 253/350
Epoch 254/350
Epoch 255/350
Epoch 

Epoch 276/350
Epoch 277/350
Epoch 278/350
Epoch 279/350
Epoch 280/350
Epoch 281/350
Epoch 282/350
Epoch 283/350
Epoch 284/350
Epoch 285/350
Epoch 286/350
Epoch 287/350
Epoch 288/350
Epoch 289/350
Epoch 290/350
Epoch 291/350
Epoch 292/350
Epoch 293/350
Epoch 294/350
Epoch 295/350
Epoch 296/350
Epoch 297/350
Epoch 298/350
Epoch 299/350
Epoch 300/350
Epoch 301/350
Epoch 302/350
Epoch 303/350
Epoch 304/350
Epoch 305/350
Epoch 306/350
Epoch 307/350
Epoch 308/350
Epoch 309/350
Epoch 310/350
Epoch 311/350
Epoch 312/350
Epoch 313/350
Epoch 314/350
Epoch 315/350
Epoch 316/350
Epoch 317/350
Epoch 318/350
Epoch 319/350
Epoch 320/350
Epoch 321/350
Epoch 322/350
Epoch 323/350
Epoch 324/350
Epoch 325/350
Epoch 326/350
Epoch 327/350
Epoch 328/350
Epoch 329/350
Epoch 330/350
Epoch 331/350
Epoch 332/350
Epoch 333/350
Epoch 334/350
Epoch 335/350
Epoch 336/350
Epoch 337/350
Epoch 338/350
Epoch 339/350
Epoch 340/350
Epoch 341/350
Epoch 342/350
Epoch 343/350
Epoch 344/350
Epoch 345/350
Epoch 346/350
Epoch 

<keras.callbacks.History at 0x7faff4419048>

In [10]:
preds2 = clf.predict(X_input_val)
score  = mean_absolute_precision_error(preds2, X_val.price)
print('%.2f' % score)

15.29


In [11]:
preds          = clf.predict(test_input)
y_preds_test   = [int(x) for x in preds]



In [12]:
filename = "submission60.csv"
with open(filename,"w+") as outputfile:
    outputfile.write("ID,price\n")
    for i in range(len(y_preds_test)):
        outputfile.write(str(test.ID[i])+","+str(int(np.ceil(y_preds_test[i])))+"\n")

# This is the 14.96 score model

# GridSearch

In [None]:
epochs = [10]#, 100, 150, 200, 250]  
batch_size = [ 20]#, 60, 80, 100, 128, 150, 200]

param_grid = dict(epochs=epochs, batch_size=batch_size)

clf_test = KerasRegressor(build_fn=base_model, epochs= 2, batch_size=100, verbose=1)


grid = GridSearchCV(estimator=clf_test, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(train_input, train_renamed.price) 


print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))