In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from keras.models import Sequential
from keras.layers import Dense, Activation
import keras.backend as K

In [None]:
import tensorflow as tf

In [None]:
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

In [None]:
def map_hotel_group(group):
    groups = {'Boss Western': 'Boss_Western_Group', 'Accar Hotels': 'Accar_Hotels', 'Independant': 'Independant_Group',
              'Yin Yang': 'Yin_Yang', 'Chillton Worldwide': 'Chillton_Worldwide',
              'Morriott International': 'Morriott_International'}

    return groups[group]

def map_hotel_brand(brand):
    brands = {'J.Halliday Inn': 'J_Halliday_Inn', 'Marcure': 'Marcure', 'Independant': 'Independant_Brand',
              'Ibas': 'Ibas', 'Safitel': 'Safitel', '8 Premium': '8_Premium', 'Tripletree': 'Tripletree',
              'CourtYord': 'CourtYord', 'Royal Lotus': 'Royal_Lotus', 'Boss Western': 'Boss_Western_Brand',
              'Corlton': 'Corlton', 'Navatel': 'Navatel', 'Ardisson': 'Ardisson', 'Morriot': 'Morriot',
              'Chill Garden Inn': 'Chill_Garden_Inn', 'Quadrupletree': 'Quadrupletree'}

    return brands[brand]

def load_full_feature_set():
    # load data
    queries = pd.read_csv('/kaggle/input/defi-ia/all_queries.csv')
    prices = pd.read_csv('/kaggle/input/defi-ia/all_prices.csv')
    hotels = pd.read_csv('/kaggle/input/defi-ia/features_hotels.csv')
    test = pd.read_csv('/kaggle/input/defi-ia/test_set.csv')

    # drop query duplicates
    queries = queries.drop_duplicates(subset=['language', 'city', 'date', 'mobile'])

    ### X_TRAIN ###
    # merge queries, prices and hotel_features
    X_train = pd.merge(queries, prices, how='inner', on='queryId')
    X_train = pd.merge(X_train, hotels, how='inner', on='hotel_id')
    X_train = X_train.drop(columns='city_y')
    X_train = X_train.rename(columns={'city_x': 'city'})

    # brand and group correction
    X_train['brand'] = X_train.apply(lambda x: map_hotel_brand(x['brand']), axis=1)
    X_train['group'] = X_train.apply(lambda x: map_hotel_group(x['group']), axis=1)
    
    # encode as categorical
    categories = ['city', 'language', 'mobile', 'group', 'brand', 'parking', 'pool', 'children_policy']

    X_train = X_train.drop(columns=['queryId', 'avatar_id', 'avatar_name'])

    # feature ordering to match test set
    X_train = X_train[['city', 'language', 'date', 'mobile',
                       'stock', 'group', 'brand', 'parking', 'pool', 'hotel_id',
                       'children_policy', 'price']]
    ### X_TRAIN ###
    
    
    ### X_TEST ###
    # merge test_set with hotel_features
    X_test = pd.merge(test, hotels, how='inner', on='hotel_id')
    X_test = X_test.drop(columns='city_y')
    X_test = X_test.rename(columns={'city_x': 'city'})

    # brand and group correction
    X_test['brand'] = X_test.apply(lambda x: map_hotel_brand(x['brand']), axis=1)
    X_test['group'] = X_test.apply(lambda x: map_hotel_group(x['group']), axis=1)

    X_test = X_test.drop(columns=['order_requests', 'avatar_id'])

    X_test = X_test[['index', 'city', 'language', 'date', 'mobile',
                     'stock', 'group', 'brand', 'parking', 'pool', 'hotel_id',
                     'children_policy']]
    ### X_TEST ###
    
    return X_train, X_test

In [None]:
X_train, X_test = load_full_feature_set()

test_idxs = X_test.pop('index')
y_train = X_train.pop('price')

X_train.pop('hotel_id')
X_test.pop('hotel_id')

categories = ['city', 'language', 'mobile', 'group', 'brand', 'parking', 'pool', 'children_policy']
X_train = pd.get_dummies(X_train, columns=categories)
X_test = pd.get_dummies(X_test, columns=categories)

In [None]:
print(f'X_train columns == X_test columns: {np.all(X_train.columns == X_test.columns)}')
print(f'Number of training samples: {X_train.shape[0]}')
print(f'Number of features: {X_train.shape[1]}')

In [None]:
model = Sequential()
model.add(Dense(units=64, input_dim=X_train.shape[1],activation='relu'))
model.add(Dense(units=64, input_dim=X_train.shape[1],activation='relu'))
model.add(Dense(units=32, input_dim=X_train.shape[1],activation='relu'))
model.add(Dense(units=16, input_dim=X_train.shape[1],activation='relu'))
model.add(Dense(1))

model.compile(loss=tf.keras.metrics.mean_squared_error,
              metrics=[tf.keras.metrics.RootMeanSquaredError(name='rmse')],
              optimizer='adadelta')

In [None]:
model.fit(X_train, y_train, epochs=100, batch_size=32, verbose=2)

In [None]:
predictions = model.predict(X_test)
predictions = predictions.squeeze()

In [None]:
submission = pd.DataFrame(data={'index': test_idxs, 'price': predictions})
submission = submission.sort_values(by=['index'])

filename = './deep_reg_submission.csv'
submission.to_csv(filename, index=False)