# Task for Today  

***

## Hotel Classification  

Given *data about guests at two different hotels (one city hotel and one resort hotel)*, let's try to predict which **hotel a guest is staying at**.  
  
We will use a TensorFlow ANN to make our predictions.

# Getting Started

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.model_selection import train_test_split

import tensorflow as tf

In [None]:
data = pd.read_csv('../input/hotel-booking-demand/hotel_bookings.csv')

In [None]:
data

In [None]:
data.info()

# Preprocessing

In [None]:
data.isna().sum()

In [None]:
data = data.drop('company', axis=1)

In [None]:
for column in ['children', 'agent']:
    data[column] = data[column].fillna(data[column].mean())

## Encoding

In [None]:
def get_categorical_uniques(df):
    return {column: list(df[column].unique()) for column in df.columns if df.dtypes[column] == 'object'}

In [None]:
get_categorical_uniques(data)

In [None]:
data['reservation_year'] = data['reservation_status_date'].apply(lambda x: np.int(x[0:4]))
data['reservation_month'] = data['reservation_status_date'].apply(lambda x: np.int(x[5:7]))

data = data.drop('reservation_status_date', axis=1)

In [None]:
data

In [None]:
get_categorical_uniques(data)

In [None]:
data['meal'] = data['meal'].replace('Undefined', np.NaN)

In [None]:
target = 'hotel'


ordinal_features = ['arrival_date_month']

nominal_features = ['meal', 'country', 'market_segment', 'distribution_channel', 'reserved_room_type', 'assigned_room_type', 'deposit_type', 'customer_type', 'reservation_status']

In [None]:
month_ordering = ['January', 'February', 'March', 'April', 'May', 'June', 'July', 'August', 'September', 'October', 'November', 'December']

In [None]:
data['arrival_date_month'] = data['arrival_date_month'].apply(lambda x: month_ordering.index(x))

# Visualizing Correlations

In [None]:
numerical_columns = [column for column in data.columns if data.dtypes[column] != 'object']

corr = data[numerical_columns].corr()

plt.figure(figsize=(18, 15))
sns.heatmap(corr, annot=True, vmin=-1.0, vmax=1.0, cmap='mako')
plt.show()

In [None]:
data = data.drop(['arrival_date_week_number', 'reservation_year'], axis=1)

# Back to Encoding

In [None]:
data

In [None]:
def onehot_encode(df, column, prefix):
    df = df.copy()
    dummies = pd.get_dummies(df[column], prefix=prefix)
    df = pd.concat([df, dummies], axis=1)
    df = df.drop(column, axis=1)
    return df

In [None]:
onehot_prefixes = ['m', 'c', 'ms', 'dc', 'rt', 'at', 'dt', 'ct', 'rs']

In [None]:
for column, prefix in zip(nominal_features, onehot_prefixes):
    data = onehot_encode(data, column, prefix)

In [None]:
data

In [None]:
label_encoder = LabelEncoder()

data['hotel'] = label_encoder.fit_transform(data['hotel'])

In [None]:
{index: label for index, label in enumerate(label_encoder.classes_)}

In [None]:
data

## Splitting and Scaling

In [None]:
y = data.loc[:, target]
X = data.drop(target, axis=1)

In [None]:
scaler = StandardScaler()

X = scaler.fit_transform(X)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=46)

# Modeling and Training

In [None]:
X.shape

In [None]:
y.mean()

In [None]:
inputs = tf.keras.Input(shape=(246,))
x = tf.keras.layers.Dense(64, activation='relu')(inputs)
x = tf.keras.layers.Dense(64, activation='relu')(x)
outputs = tf.keras.layers.Dense(1, activation='sigmoid')(x)

model = tf.keras.Model(inputs, outputs)


model.compile(
    optimizer='adam',
    loss='binary_crossentropy',
    metrics=[
        'accuracy',
        tf.keras.metrics.AUC(name='auc')
    ]
)


batch_size = 32
epochs = 7

history = model.fit(
    X_train,
    y_train,
    validation_split=0.2,
    batch_size=batch_size,
    epochs=epochs
)

# Results

In [None]:
fig = px.line(
    history.history,
    y=['loss', 'val_loss'],
    labels={'x': "Epoch", 'y': "Loss"},
    title="Loss Over Time"
)

fig.show()

In [None]:
np.argmin(history.history['val_loss'])

In [None]:
model.evaluate(X_test, y_test)

# Data Every Day  

This notebook is featured on Data Every Day, a YouTube series where I train models on a new dataset each day.  

***

Check it out!  
https://youtu.be/RMFL36yY8C0