In [1]:
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, GridSearchCV
import keras.backend as K

Using TensorFlow backend.


In [2]:
def mean_absolute_precision_error(y_pred, y_true):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
BATCH_SIZE   = 100 

LAYER_1_SIZE = 512
LAYER_2_SIZE = 512
#LAYER_3_SIZE = 1024
#LAYER_3_SIZE = 512
LAYER_4_SIZE = 16
dropout_rate = 0.

FEATURE_TYPE = 'cont_and_pred'  # 'cont' 'pred' 'all' 'all_but_pred' 'cont_and_pred' 'all_but_state_cols'
SEED         = 7

In [4]:
test   = pd.read_pickle('dataFrames/test_OneHotEncoding_new_June14th.pkl')
train  = pd.read_pickle('dataFrames/train_OneHotEncoding_new_June14th.pkl')

continuous_cols = ['destinationLatitude', 'destinationLongitude', 'distanceKM', 'sourceLatitude', 
                   'sourceLongitude', 'taxiDurationMin', 'weight', 'source', 'destination'] 

pred_cols = ['y_avg_lgb_xgb','y_gboost','y_xgb','y_bag','y_knn','y_dec','y_lgb']

vehicle_cols = ['vehicleType_joft', 'vehicleType_khavar', 'vehicleType_tak', 'vehicleType_treili', 
                'vehicleOption_bari', 'vehicleOption_hichkodam', 'vehicleOption_kafi',
                'vehicleOption_kompressi', 'vehicleOption_labehdar','vehicleOption_mosaghaf_chadori', 
                'vehicleOption_mosaghaf_felezi', 'vehicleOption_transit_chadori', 'vehicleOption_yakhchali']

#pred_cols = [ 'y_gboost', 'y_xgb', 'y_lgb']

categorical_cols = train.columns.drop(continuous_cols + pred_cols + vehicle_cols + ['ID', 'price']).tolist()

NOM = train[categorical_cols].shape[1]
renaming_dict = dict(zip(train[categorical_cols].columns, [str(x) for x in list(range(NOM)) ]))

train_renamed = train[categorical_cols].rename(columns=renaming_dict)
test_renamed  = test[categorical_cols].rename(columns=renaming_dict)

for column in continuous_cols + pred_cols + vehicle_cols:
    train_renamed[column] = train[column]
    test_renamed[column] = test[column]
    
test_renamed['ID']   = test['ID']
train_renamed['ID'] = train['ID']
test_renamed['price'] = test['price']
train_renamed['price'] = train['price']

X_train, X_val = train_test_split(train_renamed, test_size=0.2, random_state=42)
X_train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,vehicleOption_hichkodam,vehicleOption_kafi,vehicleOption_kompressi,vehicleOption_labehdar,vehicleOption_mosaghaf_chadori,vehicleOption_mosaghaf_felezi,vehicleOption_transit_chadori,vehicleOption_yakhchali,ID,price
6987,0,0,0,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,0,52642600134,24500000.0
42079,0,0,0,0,0,0,0,0,0,1,...,0,0,0,1,0,0,0,0,52386856932,9000000.0
36862,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,19056360759,8000000.0
28387,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,1,0,0,54034736201,3450000.0
35474,0,0,0,0,1,0,0,0,0,0,...,0,0,0,0,0,1,0,0,64195974128,3300000.0


In [5]:
if FEATURE_TYPE=='all':
    X_input_train = X_train.drop(['ID','price'], axis=1)
    X_input_val   = X_val.drop(['ID','price'], axis=1)
    train_input   = train_renamed.drop(['ID','price'], axis=1)
    test_input   = test_renamed.drop(['ID','price'], axis=1)
elif FEATURE_TYPE=='all_but_pred':
    X_input_train = X_train.drop(['ID','price'] + pred_cols, axis=1)
    X_input_val   = X_val.drop(['ID','price'] + pred_cols, axis=1)
    train_input   = train_renamed.drop(['ID','price'] + pred_cols, axis=1)
    test_input   = test_renamed.drop(['ID','price'] + pred_cols, axis=1)
elif FEATURE_TYPE=='all_but_state_cols':
    X_input_train = X_train[continuous_cols + pred_cols + vehicle_cols]
    X_input_val   = X_val[continuous_cols + pred_cols + vehicle_cols]
    train_input   = train_renamed[continuous_cols + pred_cols + vehicle_cols]
    test_input    = test_renamed[continuous_cols + pred_cols + vehicle_cols]
elif FEATURE_TYPE=='cont_and_pred':
    X_input_train = X_train[continuous_cols + pred_cols]
    X_input_val   = X_val[continuous_cols + pred_cols]
    train_input   = train_renamed[continuous_cols + pred_cols]
    test_input    = test_renamed[continuous_cols + pred_cols]
elif FEATURE_TYPE=='pred':
    X_input_train = X_train[pred_cols]
    X_input_val   = X_val[pred_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]
elif FEATURE_TYPE=='cont':
    X_input_train = X_train[continuous_cols]
    X_input_val   = X_val[continuous_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]
    
INPUT_SHAPE  = X_input_train.shape[1]
y_input_train = X_train.price

seed = SEED
np.random.seed(seed)

In [6]:
def MAPE_loss(y_true, y_pred):
    return K.mean(K.abs((y_true - y_pred) / y_true)) * 100

In [7]:
def base_model():
    model = Sequential()
    model.add(Dense(LAYER_1_SIZE, input_dim=INPUT_SHAPE, init='lecun_uniform', activation='relu'))
    model.add(Dropout(dropout_rate)) 
    model.add(Dense(LAYER_2_SIZE, init='lecun_uniform', activation='relu'))
    model.add(Dropout(dropout_rate)) 
    #model.add(Dense(LAYER_3_SIZE, init='normal', activation='relu'))
    #model.add(Dropout(dropout_rate))
    model.add(Dense(LAYER_4_SIZE, init='lecun_uniform', activation='relu'))
    #model.add(Dropout(dropout_rate))
    #model.add(Dense(LAYER_5_SIZE, init='normal', activation='relu'))
    model.add(Dense(1, init='lecun_uniform'))
    model.compile(loss=MAPE_loss, optimizer = 'adam')
    return model

In [None]:
#INPUT_SHAPE  = X_input_train.shape[1]
#clf_val = KerasRegressor(build_fn=base_model,  batch_size= 100, verbose=1, epochs=250)
#clf_val.fit(X_input_train,y_input_train)
#preds_val = clf_val.predict(X_input_val)
#score = mean_absolute_precision_error(preds_val, X_val.price)
#print('%.2f' % score)

# Final

In [10]:
INPUT_SHAPE  = train_input.shape[1]
clf = KerasRegressor(build_fn=base_model, epochs= 250, batch_size=100, verbose=1)
clf.fit(train_input,train_renamed.price)

Epoch 1/250
Epoch 2/250
Epoch 3/250
Epoch 4/250
Epoch 5/250
Epoch 6/250
Epoch 7/250
Epoch 8/250
Epoch 9/250
Epoch 10/250
Epoch 11/250
Epoch 12/250
Epoch 13/250
Epoch 14/250
Epoch 15/250
Epoch 16/250
Epoch 17/250
Epoch 18/250
Epoch 19/250
Epoch 20/250
Epoch 21/250
Epoch 22/250
Epoch 23/250
Epoch 24/250
Epoch 25/250
Epoch 26/250
Epoch 27/250
Epoch 28/250
Epoch 29/250
Epoch 30/250
Epoch 31/250
Epoch 32/250
Epoch 33/250
Epoch 34/250
Epoch 35/250
Epoch 36/250
Epoch 37/250
Epoch 38/250
Epoch 39/250
Epoch 40/250
Epoch 41/250
Epoch 42/250
Epoch 43/250
Epoch 44/250
Epoch 45/250
Epoch 46/250
Epoch 47/250
Epoch 48/250
Epoch 49/250
Epoch 50/250
Epoch 51/250
Epoch 52/250
Epoch 53/250
Epoch 54/250
Epoch 55/250
Epoch 56/250
Epoch 57/250
Epoch 58/250
Epoch 59/250
Epoch 60/250
Epoch 61/250
Epoch 62/250
Epoch 63/250
Epoch 64/250
Epoch 65/250
Epoch 66/250
Epoch 67/250
Epoch 68/250
Epoch 69/250
Epoch 70/250
Epoch 71/250
Epoch 72/250
Epoch 73/250
Epoch 74/250
Epoch 75/250
Epoch 76/250
Epoch 77/250
Epoch 78

Epoch 94/250
Epoch 95/250
Epoch 96/250
Epoch 97/250
Epoch 98/250
Epoch 99/250
Epoch 100/250
Epoch 101/250
Epoch 102/250
Epoch 103/250
Epoch 104/250
Epoch 105/250
Epoch 106/250
Epoch 107/250
Epoch 108/250
Epoch 109/250
Epoch 110/250
Epoch 111/250
Epoch 112/250
Epoch 113/250
Epoch 114/250
Epoch 115/250
Epoch 116/250
Epoch 117/250
Epoch 118/250
Epoch 119/250
Epoch 120/250
Epoch 121/250
Epoch 122/250
Epoch 123/250
Epoch 124/250
Epoch 125/250
Epoch 126/250
Epoch 127/250
Epoch 128/250
Epoch 129/250
Epoch 130/250
Epoch 131/250
Epoch 132/250
Epoch 133/250
Epoch 134/250
Epoch 135/250
Epoch 136/250
Epoch 137/250
Epoch 138/250
Epoch 139/250
Epoch 140/250
Epoch 141/250
Epoch 142/250
Epoch 143/250
Epoch 144/250
Epoch 145/250
Epoch 146/250
Epoch 147/250
Epoch 148/250
Epoch 149/250
Epoch 150/250
Epoch 151/250
Epoch 152/250
Epoch 153/250
Epoch 154/250
Epoch 155/250
Epoch 156/250
Epoch 157/250
Epoch 158/250
Epoch 159/250
Epoch 160/250
Epoch 161/250
Epoch 162/250
Epoch 163/250
Epoch 164/250
Epoch 165/25

Epoch 185/250
Epoch 186/250
Epoch 187/250
Epoch 188/250
Epoch 189/250
Epoch 190/250
Epoch 191/250
Epoch 192/250
Epoch 193/250
Epoch 194/250
Epoch 195/250
Epoch 196/250
Epoch 197/250
Epoch 198/250
Epoch 199/250
Epoch 200/250
Epoch 201/250
Epoch 202/250
Epoch 203/250
Epoch 204/250
Epoch 205/250
Epoch 206/250
Epoch 207/250
Epoch 208/250
Epoch 209/250
Epoch 210/250
Epoch 211/250
Epoch 212/250
Epoch 213/250
Epoch 214/250
Epoch 215/250
Epoch 216/250
Epoch 217/250
Epoch 218/250
Epoch 219/250
Epoch 220/250
Epoch 221/250
Epoch 222/250
Epoch 223/250
Epoch 224/250
Epoch 225/250
Epoch 226/250
Epoch 227/250
Epoch 228/250
Epoch 229/250
Epoch 230/250
Epoch 231/250
Epoch 232/250
Epoch 233/250
Epoch 234/250
Epoch 235/250
Epoch 236/250
Epoch 237/250
Epoch 238/250
Epoch 239/250
Epoch 240/250
Epoch 241/250
Epoch 242/250
Epoch 243/250
Epoch 244/250
Epoch 245/250
Epoch 246/250
Epoch 247/250
Epoch 248/250
Epoch 249/250
Epoch 250/250


<keras.callbacks.History at 0x7fa3584552b0>

In [11]:
preds2 = clf.predict(X_input_val)
score  = mean_absolute_precision_error(preds2, X_val.price)
print('%.2f' % score)

15.31


In [13]:
preds          = clf.predict(test_input)
y_preds_test   = [int(x) for x in preds]



In [14]:
filename = "submission59.csv"
with open(filename,"w+") as outputfile:
    outputfile.write("ID,price\n")
    for i in range(len(y_preds_test)):
        outputfile.write(str(test.ID[i])+","+str(int(np.ceil(y_preds_test[i])))+"\n")

# GridSearch

In [None]:
epochs = [10]#, 100, 150, 200, 250]  
batch_size = [ 20]#, 60, 80, 100, 128, 150, 200]

param_grid = dict(epochs=epochs, batch_size=batch_size)

clf_test = KerasRegressor(build_fn=base_model, epochs= 2, batch_size=100, verbose=1)


grid = GridSearchCV(estimator=clf_test, param_grid=param_grid, n_jobs=1)
grid_result = grid.fit(train_input, train_renamed.price) 


print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))