## Plotly visuals

In this notebook, I will plot a couple of maps to show the distibution of hotel bookings across different countries in the past three years

#### Installing necessary packages and importing

In [2]:
#pip install chart-studio

In [3]:
#pip install dash==1.11.0

In [4]:
#pip install plotly==4.6.0

In [1]:
%matplotlib inline

In [2]:
# Standard plotly imports
import chart_studio.plotly as py
import plotly.graph_objs as go
from plotly.offline import iplot, init_notebook_mode
# Using plotly + cufflinks in offline mode
import cufflinks
cufflinks.go_offline(connected=True)
init_notebook_mode(connected=True)

In [3]:
import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
import missingno
from sklearn.impute import SimpleImputer

#### Reading the dataframe and imputing missing values that was done in the previous notebook

In [4]:
df_hotel = pd.read_csv("hotel_bookings.csv")

In [5]:
df_hotel['children'].fillna(0,inplace=True)

imp_mode = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
temp_country = imp_mode.fit_transform(df_hotel['country'].values.reshape(-1,1))
df_hotel['country'] = temp_country

df_hotel.drop(['agent','company'], axis=1, inplace = True)

In [6]:
df_hotel.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,assigned_room_type,booking_changes,deposit_type,days_in_waiting_list,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,C,3,No Deposit,0,Transient,0.0,0,0,Check-Out,2015-07-01
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,C,4,No Deposit,0,Transient,0.0,0,0,Check-Out,2015-07-01
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,C,0,No Deposit,0,Transient,75.0,0,0,Check-Out,2015-07-02
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,A,0,No Deposit,0,Transient,75.0,0,0,Check-Out,2015-07-02
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,A,0,No Deposit,0,Transient,98.0,0,1,Check-Out,2015-07-03


#### To plot the countries on a map, I'm imoprting a csv file with latitude and longitude values

In [7]:
country_info = pd.read_csv("country_codes.csv")
country_info.head()

Unnamed: 0,Country,Alpha-2 code,Alpha-3 code,Numeric code,Latitude (average),Longitude (average)
0,Afghanistan,AF,AFG,4,33.0,65.0
1,Albania,AL,ALB,8,41.0,20.0
2,Algeria,DZ,DZA,12,28.0,3.0
3,American Samoa,AS,ASM,16,-14.3333,-170.0
4,Andorra,AD,AND,20,42.5,1.6


#### Keeping only necessary columns and merging with the main dataframe

In [8]:
needed = ['Country','Alpha-3 code','Latitude (average)', 'Longitude (average)']

In [9]:
country1 = country_info[needed]

In [10]:
df1 = pd.merge(df_hotel, country1, how='left', left_on='country', right_on='Alpha-3 code')

In [11]:
df1.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date,Country,Alpha-3 code,Latitude (average),Longitude (average)
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,Transient,0.0,0,0,Check-Out,2015-07-01,Portugal,PRT,39.5,-8.0
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,Transient,0.0,0,0,Check-Out,2015-07-01,Portugal,PRT,39.5,-8.0
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,Transient,75.0,0,0,Check-Out,2015-07-02,United Kingdom,GBR,54.0,-2.0
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,Transient,75.0,0,0,Check-Out,2015-07-02,United Kingdom,GBR,54.0,-2.0
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,Transient,98.0,0,1,Check-Out,2015-07-03,United Kingdom,GBR,54.0,-2.0


In [12]:
df1.rename(columns={'Latitude (average)': 'latitude', 'Longitude (average)': 'longitude','Alpha-3 code':'3_letter_code'}, inplace=True)

#### Importing plotly and chart_studio and setting access token and api key

In [13]:
import plotly
import chart_studio

# setting user, api key and access token
chart_studio.tools.set_credentials_file(username='rahul95siva', api_key='pUuKUzJHPVm8T6q1dGts')
mapbox_access_token = 'pk.eyJ1IjoicmFodWw5NXNpdmEiLCJhIjoiY2s5YTAzaXVqMDIxZDNtbjd4amZ0cW9idyJ9.ELs-bsgHnPKCXsThvWy4TA'

In [14]:
hotel_type = list(df1['hotel'].unique())
hotel_type

['Resort Hotel', 'City Hotel']

In [15]:
import chart_studio.plotly as py

In [16]:
df1.head()

Unnamed: 0,hotel,is_canceled,lead_time,arrival_date_year,arrival_date_month,arrival_date_week_number,arrival_date_day_of_month,stays_in_weekend_nights,stays_in_week_nights,adults,...,customer_type,adr,required_car_parking_spaces,total_of_special_requests,reservation_status,reservation_status_date,Country,3_letter_code,latitude,longitude
0,Resort Hotel,0,342,2015,July,27,1,0,0,2,...,Transient,0.0,0,0,Check-Out,2015-07-01,Portugal,PRT,39.5,-8.0
1,Resort Hotel,0,737,2015,July,27,1,0,0,2,...,Transient,0.0,0,0,Check-Out,2015-07-01,Portugal,PRT,39.5,-8.0
2,Resort Hotel,0,7,2015,July,27,1,0,1,1,...,Transient,75.0,0,0,Check-Out,2015-07-02,United Kingdom,GBR,54.0,-2.0
3,Resort Hotel,0,13,2015,July,27,1,0,1,1,...,Transient,75.0,0,0,Check-Out,2015-07-02,United Kingdom,GBR,54.0,-2.0
4,Resort Hotel,0,14,2015,July,27,1,0,2,2,...,Transient,98.0,0,1,Check-Out,2015-07-03,United Kingdom,GBR,54.0,-2.0


#### Grouping the dataframe by count of rows

In [17]:
df2 = df1.groupby(["arrival_date_year", "country","hotel","latitude","longitude"], as_index=False)["lead_time"].count()
df2.head()

Unnamed: 0,arrival_date_year,country,hotel,latitude,longitude,lead_time
0,2015,AGO,City Hotel,-12.5,18.5,49
1,2015,AGO,Resort Hotel,-12.5,18.5,1
2,2015,ALB,City Hotel,41.0,20.0,1
3,2015,ALB,Resort Hotel,41.0,20.0,1
4,2015,ARG,City Hotel,-34.0,-64.0,6


In [18]:
df2.rename(columns={'lead_time': 'bookings'}, inplace=True)
df2.shape

(627, 6)

In [19]:
df2.head()

Unnamed: 0,arrival_date_year,country,hotel,latitude,longitude,bookings
0,2015,AGO,City Hotel,-12.5,18.5,49
1,2015,AGO,Resort Hotel,-12.5,18.5,1
2,2015,ALB,City Hotel,41.0,20.0,1
3,2015,ALB,Resort Hotel,41.0,20.0,1
4,2015,ARG,City Hotel,-34.0,-64.0,6


#### Choosing only countries that have over five bookings

In [20]:
df2 = df2[df2['bookings'] > 5]

In [21]:
df2.head()

Unnamed: 0,arrival_date_year,country,hotel,latitude,longitude,bookings
0,2015,AGO,City Hotel,-12.5,18.5,49
4,2015,ARG,City Hotel,-34.0,-64.0,6
5,2015,ARG,Resort Hotel,-34.0,-64.0,8
7,2015,AUS,City Hotel,-27.0,133.0,15
8,2015,AUS,Resort Hotel,-27.0,133.0,12


#### Subsetting only resort hotel bookings to plot on the map

In [22]:
df2_resort = df2.loc[df2['hotel'] == 'Resort Hotel']
df2_resort.head()

Unnamed: 0,arrival_date_year,country,hotel,latitude,longitude,bookings
5,2015,ARG,Resort Hotel,-34.0,-64.0,8
8,2015,AUS,Resort Hotel,-27.0,133.0,12
10,2015,AUT,Resort Hotel,47.3333,13.3333,43
13,2015,BEL,Resort Hotel,50.8333,4.0,87
18,2015,BRA,Resort Hotel,-10.0,-55.0,48


In [23]:
df2_resort.drop('hotel', axis=1, inplace=True)
df2_resort.head()

Unnamed: 0,arrival_date_year,country,latitude,longitude,bookings
5,2015,ARG,-34.0,-64.0,8
8,2015,AUS,-27.0,133.0,12
10,2015,AUT,47.3333,13.3333,43
13,2015,BEL,50.8333,4.0,87
18,2015,BRA,-10.0,-55.0,48


In [24]:
df2_resort = df2_resort.groupby(["country", "latitude","longitude"], as_index=False)["bookings"].sum()

In [25]:
df2_resort.head()

Unnamed: 0,country,latitude,longitude,bookings
0,AGO,-12.5,18.5,23
1,ARE,24.0,54.0,7
2,ARG,-34.0,-64.0,57
3,AUS,-27.0,133.0,87
4,AUT,47.3333,13.3333,210


In [26]:
df2_resort = df2_resort.reset_index()
df2_resort.drop('index',axis=1,inplace=True)

#### Creating a new column that would be a scaled version of the bookings column

This will serve as the size of bubble in the map

In [27]:
from sklearn import preprocessing
mm_scaler = preprocessing.MinMaxScaler(feature_range = (5,25))
df4 = pd.DataFrame(df2_resort['bookings'])
df4.head()
df4 = df4.values
scaled_bookings = mm_scaler.fit_transform(df4)
scale_df = pd.DataFrame(scaled_bookings, columns=["scaled_booking"])
scale_df.head()

Unnamed: 0,scaled_booking
0,5.018797
1,5.001106
2,5.056391
3,5.089562
4,5.225564


In [28]:
df2_resort['scaled_booking'] = scale_df

In [29]:
df2_resort.head()

Unnamed: 0,country,latitude,longitude,bookings,scaled_booking
0,AGO,-12.5,18.5,23,5.018797
1,ARE,24.0,54.0,7,5.001106
2,ARG,-34.0,-64.0,57,5.056391
3,AUS,-27.0,133.0,87,5.089562
4,AUT,47.3333,13.3333,210,5.225564


#### Creating a column to identify the type of hotel

In [30]:
df2_resort['hotel'] = 'Resort Hotel'

#### Creating the map

We plot the coordinates using the latitude and longitude values. Size of bubble will be proportional to number of bookings

In [31]:
data = []

for hotel in hotel_type:
    hotel_dict = dict(
            lat = df2_resort.loc[df2_resort['hotel'] == hotel,'latitude'],
            lon = df2_resort.loc[df2_resort['hotel'] == hotel,'longitude'],
            hovertext = df2_resort['country'],
            name = hotel,
            marker = dict(size = df2_resort['scaled_booking'], opacity = 0.5),
            type = 'scattermapbox'
        )
    data.append(hotel_dict)
    
#Customise layout
layout = dict(
    height = 800,
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    font = dict(color = '#FFFFFF', size = 11),
    paper_bgcolor = '#000000',
    mapbox = dict(
        #We have defined the access token earlier
        accesstoken = mapbox_access_token,
        bearing = 0,
        #Centre of the map
        center = dict(
            lat = df2_resort['latitude'].unique().mean(),
            lon = df2_resort['longitude'].unique().mean()
        ),
        pitch = 0,
        
        #default level of zoom
        zoom = 1,
        
        #default map style
        style = 'dark'
    )
)

In [32]:
annotations = [dict(
  
              #Text displayed on map
              text = 'Distibution of Resort Hotel bookings', 
              
              #Font and border characteristics
              font = dict(color = '#FFFFFF', size = 14), borderpad = 10, 
              
              #Positional arguments
              x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left', 
              
              #Don't show arrow and set background color
              showarrow = False, bgcolor = 'black'
              )]

#Assign annotations to the layout
layout['annotations'] = annotations

### Plotting the data for Resort Hotels

In [33]:
figure = dict(data = data, layout = layout)
py.iplot(figure, filename = 'Resort_bookings')

## Similarly, for City Hotels

In [65]:
df2_city = df2.loc[df2['hotel'] == 'City Hotel']
df2_city.head()

Unnamed: 0,arrival_date_year,country,hotel,latitude,longitude,bookings
0,2015,AGO,City Hotel,-12.5,18.5,49
4,2015,ARG,City Hotel,-34.0,-64.0,6
7,2015,AUS,City Hotel,-27.0,133.0,15
9,2015,AUT,City Hotel,47.3333,13.3333,66
12,2015,BEL,City Hotel,50.8333,4.0,124


In [66]:
df2_city = df2_city.groupby(["country", "latitude","longitude"], as_index=False)["bookings"].sum()

In [68]:
df2_city = df2_city.reset_index()
df2_city.drop('index',axis=1,inplace=True)
df2_city.head()

Unnamed: 0,country,latitude,longitude,bookings
0,AGO,-12.5,18.5,338
1,ARE,24.0,54.0,40
2,ARG,-34.0,-64.0,157
3,AUS,-27.0,133.0,339
4,AUT,47.3333,13.3333,1053


#### Introducing a scaled column to serve as the size of bubble

In [69]:
mm_scaler = preprocessing.MinMaxScaler(feature_range = (5,25))
df4 = pd.DataFrame(df2_city['bookings'])
df4.head()
df4 = df4.values
scaled_bookings = mm_scaler.fit_transform(df4)
scale_df = pd.DataFrame(scaled_bookings, columns=["scaled_booking"])
scale_df.head()

Unnamed: 0,scaled_booking
0,5.214346
1,5.021951
2,5.097489
3,5.214991
4,5.675964


In [70]:
df2_city['scaled_booking'] = scale_df
df2_city['hotel'] = 'City Hotel'
df2_city.head()

Unnamed: 0,country,latitude,longitude,bookings,scaled_booking,hotel
0,AGO,-12.5,18.5,338,5.214346,City Hotel
1,ARE,24.0,54.0,40,5.021951,City Hotel
2,ARG,-34.0,-64.0,157,5.097489,City Hotel
3,AUS,-27.0,133.0,339,5.214991,City Hotel
4,AUT,47.3333,13.3333,1053,5.675964,City Hotel


#### Plotting the data for city hotels

In [71]:
data = []

for hotel in hotel_type:
    hotel_dict = dict(
            lat = df2_city.loc[df2_city['hotel'] == hotel,'latitude'],
            lon = df2_city.loc[df2_city['hotel'] == hotel,'longitude'],
            hovertext = df2_city['country'],
            name = hotel,
            marker = dict(size = df2_city['scaled_booking'], opacity = 0.5),
            type = 'scattermapbox'
        )
    data.append(hotel_dict)
    
#Customise layout
layout = dict(
    height = 800,
    # top, bottom, left and right margins
    margin = dict(t = 0, b = 0, l = 0, r = 0),
    font = dict(color = '#FFFFFF', size = 11),
    paper_bgcolor = '#000000',
    mapbox = dict(
        
        accesstoken = mapbox_access_token,
        bearing = 0,
        
        #Centre of map
        center = dict(
            lat = df2_city['latitude'].unique().mean(),
            lon = df2_city['longitude'].unique().mean()
        ),
        pitch = 0,
        
        zoom = 1,
        style = 'dark'
    )
)

In [72]:
annotations = [dict(
  
              #Text to display
              text = 'Distibution of City Hotel bookings', 
              
              #Font and border characteristics
              font = dict(color = '#FFFFFF', size = 14), borderpad = 10, 
              
              #Positional arguments
              x = 0.05, y = 0.05, xref = 'paper', yref = 'paper', align = 'left', 
              
              #Don't show arrow and set background color
              showarrow = False, bgcolor = 'black'
              )]

#Assign the annotations to the layout
layout['annotations'] = annotations

In [73]:
figure = dict(data = data, layout = layout)
py.iplot(figure, filename = 'City_hotels')