In [223]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

In [204]:
df = pd.read_csv("hotel_bookings (1).csv")

In [205]:
df.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,No Deposit,,,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,No Deposit,304.0,,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,No Deposit,240.0,,0,Transient,98.0,0,1,Check-Out,2015-07-03


In [229]:
#Changing the Month into the number representation. Example- Jan - 1, Feb - 2
df['arrival_date_month'] = pd.to_datetime(df['arrival_date_month'], format='%B').dt.month

#Compling all the date related columns and making a column of datatype datetime
df['arrival_date'] = pd.to_datetime(df[['arrival_date_year', 'arrival_date_month', 'arrival_date_day_of_month']].rename(columns={
    'arrival_date_year': 'year',
    'arrival_date_month': 'month',
    'arrival_date_day_of_month': 'day'
}))

KeyError: 'arrival_date_month'

In [230]:
#Dropping the unwanted columns
df.drop(columns=['arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month'],inplace=True)

KeyError: "['arrival_date_year', 'arrival_date_month', 'arrival_date_week_number', 'arrival_date_day_of_month'] not found in axis"

In [208]:
df.columns

Index(['hotel', 'is_canceled', 'lead_time', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date', 'arrival_date'],
      dtype='object')

In [209]:
df.isnull().sum()

hotel                                  0
is_canceled                            0
lead_time                              0
stays_in_weekend_nights                0
stays_in_week_nights                   0
adults                                 0
children                               4
babies                                 0
meal                                   0
country                              488
market_segment                         0
distribution_channel                   0
is_repeated_guest                      0
previous_cancellations                 0
previous_bookings_not_canceled         0
reserved_room_type                     0
assigned_room_type                     0
booking_changes                        0
deposit_type                           0
agent                              16340
company                           112593
days_in_waiting_list                   0
customer_type                          0
adr                                    0
required_car_par

In [210]:
df['children'] = df['children'].fillna(0)

In [213]:
#Dropping this column because it has many null values
df.drop(columns=['company'],inplace =True)

In [214]:
df.dtypes

hotel                                     object
is_canceled                                int64
lead_time                                  int64
stays_in_weekend_nights                    int64
stays_in_week_nights                       int64
adults                                     int64
children                                 float64
babies                                     int64
meal                                      object
country                                   object
market_segment                            object
distribution_channel                      object
is_repeated_guest                          int64
previous_cancellations                     int64
previous_bookings_not_canceled             int64
reserved_room_type                        object
assigned_room_type                        object
booking_changes                            int64
deposit_type                              object
agent                                    float64
days_in_waiting_list

In [215]:
#Removing the rows with null values and undefined data
df = df[df['meal']!="Undefined"]
df = df[df['distribution_channel']!= "Undefined"]
df = df[pd.notna(df['agent'])]
df = df[pd.notna(df['country'])]

In [216]:
#One- Hot Encoding for the Categorical Columns

df = pd.get_dummies(df, columns=["hotel",'meal','country','market_segment','distribution_channel','reserved_room_type','assigned_room_type','deposit_type','customer_type','reservation_status'])

In [217]:
#Changing the reservation_status_date column to datetime
df.reservation_status_date = pd.to_datetime(df.reservation_status_date)

In [218]:
#Making new columns from the arival_date column so the the data can be fed into the neural network

df['arrival_year'] = df['arrival_date'].dt.year
df['arrival_month'] = df['arrival_date'].dt.month
df['arrival_day'] = df['arrival_date'].dt.day
df['arrival_day_of_week'] = df['arrival_date'].dt.weekday 
df['arrival_day_of_year'] = df['arrival_date'].dt.dayofyear
df['arrival_is_weekend'] = df['arrival_day_of_week'].isin([5, 6]).astype(int)

In [219]:
#Making new columns from the reservation_status_date column so the the data can be fed into the neural network

df['reservation_year'] = df['reservation_status_date'].dt.year
df['reservation_month'] = df['reservation_status_date'].dt.month
df['reservation_day'] = df['reservation_status_date'].dt.day
df['reservation_day_of_week'] = df['reservation_status_date'].dt.weekday  # 0 = Monday, 6 = Sunday
df['reservation_day_of_year'] = df['reservation_status_date'].dt.dayofyear
df['reservation_is_weekend'] = df['reservation_day_of_week'].isin([5, 6]).astype(int)

In [220]:
#Dropping the unwanted columns

df.drop(columns=['arrival_date','reservation_status_date'],inplace=True)

In [221]:
#One-Hot encoding the date related columns as they are cotegorical columns

df = pd.get_dummies(df, columns=['arrival_year','arrival_month','arrival_day','arrival_day_of_week','arrival_day_of_year','arrival_is_weekend','reservation_year','reservation_month','reservation_day','reservation_day_of_week','reservation_day_of_year','reservation_is_weekend'])

In [225]:
#Dividing the data into X and y
X = df.drop('is_canceled', axis=1)
y = df['is_canceled']

# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [226]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

# Initialize the model
model = Sequential()

# Add layers
model.add(Dense(64, input_dim=X_train.shape[1], activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(1, activation='sigmoid'))  

# Compile the model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])



  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


In [227]:
model.fit(X_train, y_train, epochs=20, batch_size=32, validation_split=0.2)


Epoch 1/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 3ms/step - accuracy: 0.9144 - loss: 0.2733 - val_accuracy: 1.0000 - val_loss: 7.9030e-04
Epoch 2/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 1.0000 - loss: 4.6724e-04 - val_accuracy: 1.0000 - val_loss: 1.7350e-04
Epoch 3/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 3ms/step - accuracy: 1.0000 - loss: 1.0387e-04 - val_accuracy: 1.0000 - val_loss: 3.8809e-05
Epoch 4/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 1.0000 - loss: 2.1617e-05 - val_accuracy: 1.0000 - val_loss: 9.8279e-06
Epoch 5/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 3ms/step - accuracy: 1.0000 - loss: 5.2775e-06 - val_accuracy: 1.0000 - val_loss: 2.8820e-06
Epoch 6/20
[1m2042/2042[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 3ms/step - accuracy: 1.0000 - loss: 1.3944e-06 - val_accuracy:

<keras.src.callbacks.history.History at 0x2b4b529f6e0>

In [228]:
#Measuring the accuracy of the code

loss,accuracy = model.evaluate(X_test, y_test)

accuracy

[1m638/638[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - accuracy: 1.0000 - loss: 3.6613e-06


1.0