In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_palette('magma')
%matplotlib inline

In [None]:
dir = "../input/hotel-booking-demand/hotel_bookings.csv"

df = pd.read_csv(dir)

In [None]:
df.info()

In [None]:
import calendar
cal = {v: k for k,v in enumerate(calendar.month_name)}
df['arrival_date_month'] = df['arrival_date_month'].map(cal)

In [None]:
df.head()

In [None]:
100 * df.isnull().sum()/len(df)

In [None]:
df=df.drop(['agent','company','reservation_status_date'],axis=1)

In [None]:
df = df.dropna()

In [None]:
plt.figure(figsize=(10,8))
sns.heatmap(df.corr(),cmap='plasma')

In [None]:
df.corr()['is_canceled'][1:].sort_values()

In [None]:
df.groupby('arrival_date_month').mean()

In [None]:
sns.set_style('whitegrid')
plt.figure(figsize=(12,8))
sns.countplot('arrival_date_month',data=df,hue='arrival_date_month')
plt.xlabel("Month")

In [None]:
df.groupby('country').count().sort_values(by=['hotel'],ascending=0)

In [None]:
df['previous_cancellations']

In [None]:
plt.figure(figsize=(12,8))
sns.countplot('hotel',data=df,hue='arrival_date_month')

plt.figure(figsize=(12,8))
sns.countplot('arrival_date_month',data=df,hue='hotel')

In [None]:
plt.figure(figsize=(12,8))
sns.countplot('arrival_date_month',data=df,hue='is_canceled')

In [None]:
plt.figure(figsize=(12,8))
sns.countplot('hotel',data=df,hue='is_canceled')
plt.legend(["Not Cancelled","Cancelled"])
plt.ylabel("Count")

In [None]:
plt.figure(figsize=(20,8))
plt.legend(["Resort","City"])
ax = sns.distplot(df[df["hotel"] == "Resort Hotel"]['adr'],bins=20)
sns.distplot(df[df["hotel"] == "City Hotel"]['adr'],bins=20,ax=ax)

In [None]:
plt.figure(figsize=(12,6))
sns.countplot(df['customer_type'])

plt.figure(figsize=(12,6))
sns.countplot(df['meal'])


In [None]:
plt.figure(figsize=(12,4))
sns.countplot(df['deposit_type'])

In [None]:
len(df.columns)

## Data Modelling

In [None]:
df=pd.get_dummies(df,prefix=['hotel', 'meal', 'country', 'market_segment',
       'distribution_channel', 'reserved_room_type', 'assigned_room_type',
       'deposit_type', 'customer_type', 'reservation_status'])

In [None]:
df.head()

In [None]:
df.dtypes.value_counts()

In [None]:
X = df.drop('is_canceled',axis=1)
y =  df['is_canceled']

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout

In [None]:
X_train.shape

In [None]:
model = Sequential()

model.add(Dense(246,activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(123,activation='relu'))
model.add(Dropout(0.3))

model.add(Dense(1,activation='sigmoid'))

model.compile('adam',loss='binary_crossentropy',metrics=['accuracy'])


In [None]:
from tensorflow.keras.callbacks import EarlyStopping

In [None]:
early = EarlyStopping(patience=5)

In [None]:
model.fit(x=X_train.values,y=y_train.values,validation_data=(X_test,y_test),batch_size=64,epochs=10,verbose=1,callbacks=[early])

In [None]:
metrics = pd.DataFrame(model.history.history)

In [None]:
metrics

In [None]:
metrics[['loss','val_loss']].plot()

In [None]:
metrics[['accuracy','val_accuracy']].plot()

In [None]:
model.evaluate(X_test,y_test)

In [None]:
pred = model.predict_classes(X_test)

In [None]:
from sklearn.metrics import confusion_matrix,classification_report

In [None]:
sns.heatmap(confusion_matrix(y_test,pred),annot=True,cmap='magma')

In [None]:
print(classification_report(y_test,pred))

In [None]:
from tensorflow.keras.models import save_model

In [None]:
model.save("model_1.h5")

# Fin.