In [202]:
import pandas as pd
import numpy as np
from plotly import graph_objs as go
from plotly.offline import init_notebook_mode, iplot
import plotly.io as pio
pio.renderers.default='notebook'
import plotly.figure_factory as ff
from plotly.subplots import make_subplots
from codes import country_codes
import seaborn as sns
sns.set(style="whitegrid")
pd.set_option("display.max_columns", 36)

In [4]:
hb_df = pd.read_csv('hotel_bookings.csv')
hb_df.shape

(119390, 32)

In [6]:
hb_df.head(2)

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,deposit_type,agent,company,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,No Deposit,,,0,Transient,0.0,0,0,Check-Out,2015-07-01


In [7]:
hb_df.columns

Index(['hotel', 'is_canceled', 'lead_time', 'arrival_date_year',
       'arrival_date_month', 'arrival_date_week_number',
       'arrival_date_day_of_month', 'stays_in_weekend_nights',
       'stays_in_week_nights', 'adults', 'children', 'babies', 'meal',
       'country', 'market_segment', 'distribution_channel',
       'is_repeated_guest', 'previous_cancellations',
       'previous_bookings_not_canceled', 'reserved_room_type',
       'assigned_room_type', 'booking_changes', 'deposit_type', 'agent',
       'company', 'days_in_waiting_list', 'customer_type', 'adr',
       'required_car_parking_spaces', 'total_of_special_requests',
       'reservation_status', 'reservation_status_date'],
      dtype='object')

In [21]:
for i in hb_df.columns:
    print('data type for {}: {}'.format(i, hb_df[i].dtype))

data type for hotel: object
data type for is_canceled: int64
data type for lead_time: int64
data type for arrival_date_year: int64
data type for arrival_date_month: object
data type for arrival_date_week_number: int64
data type for arrival_date_day_of_month: int64
data type for stays_in_weekend_nights: int64
data type for stays_in_week_nights: int64
data type for adults: int64
data type for children: float64
data type for babies: int64
data type for meal: object
data type for country: object
data type for market_segment: object
data type for distribution_channel: object
data type for is_repeated_guest: int64
data type for previous_cancellations: int64
data type for previous_bookings_not_canceled: int64
data type for reserved_room_type: object
data type for assigned_room_type: object
data type for booking_changes: int64
data type for deposit_type: object
data type for agent: float64
data type for company: float64
data type for days_in_waiting_list: int64
data type for customer_type: obj

In [17]:
missing_values = hb_df.isnull().sum()
missing_values[missing_values>0]

children         4
country        488
agent        16340
company     112593
dtype: int64

In [38]:
hb_df.fillna({'children': 0.0, 'country': 'Unknown', 'agent': 0, 'company': 0}, inplace=True)
for_dropping = hb_df[(hb_df['adults'] == 0) & (hb_df['children'] == 0) & (hb_df['babies'] == 0)].index.tolist()
hb_df.drop(axis = 0, index = for_dropping, inplace = True)

In [173]:
ch = hb_df[(hb_df['hotel'] == 'City Hotel') & (hb_df['is_canceled'] !=0)]
rh = hb_df[(hb_df['hotel'] == 'Resort Hotel') & (hb_df['is_canceled'] !=0)]

In [182]:
countries = hb_df['country'].value_counts().reset_index()
countries.rename(columns = {'index' : 'country', 'country': 'no_of_guests'}, inplace = True)
countries['%_of_total_guests'] = (countries['no_of_guests']/countries['no_of_guests'].sum())*100

In [183]:
top_countries = countries.loc[0:9]
others = {'country':'Others', 'no_of_guests': countries.loc[10:]['no_of_guests'].sum(), '%_of_total_guests': countries.loc[10:]['%_of_total_guests'].sum()}
top_countries = top_countries.append(others, ignore_index=True)

In [184]:
trace = go.Pie(labels = top_countries['country'], values = top_countries['no_of_guests'], pull=[0.04, 0], marker=dict(colors=["#8ao49b", "#z028de"]))
layout = go.Layout(title="Top 10 Countries Where Guests Visit From", height=500, legend=dict(x=0.1, y=1.1))
fig = go.Figure(data = [trace], layout = layout)
fig.show()

In [185]:
for_dropping = countries[(countries['country'] == 'CN')].index.tolist()
countries.drop(axis = 0, index = for_dropping, inplace = True)
for_dropping = countries[(countries['country'] == 'Unknown')].index.tolist()
countries.drop(axis = 0, index = for_dropping, inplace = True)

In [190]:
country_codes = {v: k for k, v in country_codes.items()}

In [191]:
countries['country_name'] = ''
for i, r in countries.iterrows():
    if r['country'] in list(country_codes.keys()):
        countries.loc[i,'country_names'] = country_codes[r['country']]

In [192]:
data = [dict(
            type = 'choropleth',
            locationmode='country names',
            locations = list(countries['country_names']),
            z = list(countries['no_of_guests']),
            colorscale = [[0,"rgb(5, 10, 172)"],[0.65,"rgb(40, 60, 190)"],[0.75,"rgb(70, 100, 245)"],\
                        [0.80,"rgb(90, 120, 245)"],[0.9,"rgb(106, 137, 247)"],[1,"rgb(220, 220, 220)"]],
            autocolorscale = False,
            reversescale = True,
            marker = dict(
                line = dict (
                    color = 'gray',
                    width = 0.4
                ) ),
          ) ]

layout = dict(
        title = 'Countries Where Guests Visit From',
        height = 700,
        width = 1000,
        geo = dict(
            showframe = False,
            showcoastlines = True,
            projection = dict(
                type = 'mercator'
            )
        )
    )



fig = go.Figure(data=data, layout=layout)
fig.show()

In [201]:
ch['adr_pp'] = ch['adr']/(ch['adults'] + ch['children'])
rh['adr_pp'] = rh['adr']/(rh['adults'] + rh['children'])

print("""From all non-cnceled bookings, across all room types and meals, the average prices are:
Resort hotel: €{:.2f} per night per person.
City hotel: €{:.2f} per night per person."""
      .format(rh["adr_pp"].mean(), ch["adr_pp"].mean()))

From all non-cnceled bookings, across all room types and meals, the average prices are:
Resort hotel: €51.06 per night per person.
City hotel: €57.39 per night per person.
