In [1]:
import warnings 
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
from keras.models import Sequential
from keras.layers import Dense
from keras.wrappers.scikit_learn import KerasRegressor
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.model_selection import train_test_split, KFold
import keras.backend as K

Using TensorFlow backend.


In [2]:
def mean_absolute_precision_error(y_pred, y_true):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

In [3]:
BATCH_SIZE   = 25 
EPOCHS       = 1000 
LAYER_1_SIZE = 1024
LAYER_2_SIZE = 1024
LAYER_3_SIZE = 1024
LAYER_4_SIZE = 64

FEATURE_TYPE = 'pred'  # 'cont' 'pred' 'all'
SEED         = 7

In [4]:
test   = pd.read_pickle('dataFrames/test_OneHotEncoding_new_June14th.pkl')
train  = pd.read_pickle('dataFrames/train_OneHotEncoding_new_June14th.pkl')

continuous_cols = ['destinationLatitude', 'destinationLongitude', 'distanceKM', 'sourceLatitude', 
                   'sourceLongitude', 'taxiDurationMin', 'weight', 'source', 'destination', 
                   'y_avg_lgb_xgb','y_gboost', 'y_xgb', 'y_bag', 'y_knn', 'y_dec', 'y_lgb' ]

pred_cols = ['y_avg_lgb_xgb','y_gboost','y_xgb','y_bag','y_knn','y_dec','y_lgb']

pred_cols = [ 'y_gboost', 'y_xgb', 'y_lgb']

categorical_cols = train.columns.drop(continuous_cols + ['ID', 'price']).tolist()

NOM = train[categorical_cols].shape[1]
renaming_dict = dict(zip(train[categorical_cols].columns, [str(x) for x in list(range(NOM)) ]))

train_renamed = train[categorical_cols].rename(columns=renaming_dict)
test_renamed  = test[categorical_cols].rename(columns=renaming_dict)

for column in continuous_cols:
    train_renamed[column] = train[column]
    test_renamed[column] = test[column]
    
test_renamed['ID']   = test['ID']
train_renamed['ID'] = train['ID']
test_renamed['price'] = test['price']
train_renamed['price'] = train['price']

X_train, X_val = train_test_split(train_renamed, test_size=0.2, random_state=42)

In [5]:
if FEATURE_TYPE=='all':
    X_input_train = X_train.drop(['ID','price'], axis=1)
    X_input_val   = X_val.drop(['ID','price'], axis=1)
    train_input   = train_renamed.drop(['ID','price'], axis=1)
    test_input   = test_renamed.drop(['ID','price'], axis=1)
elif FEATURE_TYPE=='cont':
    X_input_train = X_train[continuous_cols]
    X_input_val   = X_val[continuous_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]
elif FEATURE_TYPE=='pred':
    X_input_train = X_train[pred_cols]
    X_input_val   = X_val[pred_cols]
    train_input   = train_renamed[continuous_cols]
    test_input    = test_renamed[continuous_cols]

INPUT_SHAPE  = X_input_train.shape[1]
y_input_train = X_train.price

seed = SEED
np.random.seed(seed)

In [6]:
def MAPE_loss(y_true, y_pred):
    return K.mean(K.abs((y_true - y_pred) / y_true)) * 100

In [7]:
def base_model():
    model = Sequential()
    model.add(Dense(LAYER_1_SIZE, input_dim=INPUT_SHAPE, init='normal', activation='relu'))
    model.add(Dense(LAYER_2_SIZE, init='normal', activation='relu'))
    model.add(Dense(LAYER_3_SIZE, init='normal', activation='relu'))
    model.add(Dense(LAYER_4_SIZE, init='normal', activation='relu'))
    #model.add(Dense(LAYER_5_SIZE, init='normal', activation='relu'))
    model.add(Dense(1, init='normal'))
    model.compile(loss=MAPE_loss, optimizer = 'adam')
    return model

In [None]:
clf_val = KerasRegressor(build_fn=base_model,  batch_size=BATCH_SIZE, verbose=1)

In [None]:
clf_val.fit(X_input_train,y_input_train)
preds_val = clf_val.predict(X_input_val)

In [None]:
score = mean_absolute_precision_error(preds_val, X_val.price)
print('%.2f' % score)

# Final Training

In [8]:
INPUT_SHAPE  = train_input.shape[1]
clf = KerasRegressor(build_fn=base_model, nb_epoch=EPOCHS, batch_size=BATCH_SIZE, verbose=1)
clf.fit(train_input,train_renamed.price)

Epoch 1/1


<keras.callbacks.History at 0x1a201bc7f0>

In [None]:
preds2 = clf.predict(X_input_val)
score  = mean_absolute_precision_error(preds2, X_val.price)
print('%.2f' % score)

In [9]:
preds          = clf.predict(test_input)
y_preds_test   = [int(x) for x in preds]



In [10]:
filename = "/Users/mohsenkiskani/Downloads/Ubaar/submissions/submission51.csv"
with open(filename,"w+") as outputfile:
    outputfile.write("ID,price\n")
    for i in range(len(y_preds_test)):
        outputfile.write(str(test.ID[i])+","+str(int(np.ceil(y_preds_test[i])))+"\n")