# Exploratory Data Analysis Project : Hotel Data set

 


done : How much do guests pay for a room per night? <br>
done : How does the price per night vary over the year?<br>
done:  Which are the most busy month?<br>
done:  How long do people stay at the hotels? <br>
done : Bookings by market segment? <br>
done : How many bookings were canceled? <br>
done : Which month have the highest number of cancelations? <br>

In [None]:
import pandas as pd 
import numpy as np 
import plotly.express as px
import plotly.graph_objs as go

%matplotlib inline

!pip install sort-dataframeby-monthorweek
!pip install sorted-months-weekdays

In [None]:
# reading data
df = pd.read_csv('../input/hotel-booking-data-1-lakh-records/hotel_bookings.csv')
pd.set_option('display.max_columns',None)
df.head()

In [None]:
df.shape

In [None]:
df.describe()

In [None]:
df.dtypes

In [None]:
df.isnull().sum()

In [None]:
# filling null values with zero

df.fillna(0, inplace = True)

In [None]:
df.hotel.unique()

In [None]:
df['hotel'] = df['hotel'].replace('Resort Hotel', 'Resort')

In [None]:
df.hotel.value_counts()

## Confirmed Booking vs Bookings Cancellation Detailed Analysis

In [None]:
df.head(2)

In [None]:
fig = px.histogram(df,x='is_canceled',
                        title='Number of Bookings: (Cancelled-1 & Confirmed-0)',
                        template = 'plotly_dark',
                        opacity=0.8,
                        color = 'is_canceled',
                        labels={'is_canceled':'Booking Type'}
                        )
fig.update_layout(barmode='group',height=400,width=600,yaxis_title='Number of Bookings')
fig.update_xaxes(type='category')
fig.show()

In [None]:
fig = px.histogram(df,x='assigned_room_type',
                        title='Number of Bookings: Canceled(1) or Not canceled(0)',
                        template = 'plotly_dark',
                        opacity=0.8,
                        color = 'is_canceled',
                        labels={'is_canceled':'Booking Status'}
                        )
fig.update_layout(barmode='group',height=520,width=1000,xaxis_title='Assigned Room Class',yaxis_title='Number of bookings')
fig.update_xaxes(type='category')
fig.show()

In [None]:
fig = px.histogram(df,x='hotel',
                        title='Resort VS City Hotel',
                        template = 'plotly_dark',
                        opacity=0.8,
                        color = 'is_canceled',
                        labels={'is_canceled':'Booking Status'}
                        )
fig.update_layout(barmode='group',height=400,width=900,yaxis_title = 'Number of bookings', xaxis_title='')

fig.show()

In [None]:
df_booked = df[df['is_canceled'] ==0]
df_booked.shape

In [None]:
df_cancelled = df[df['is_canceled']==1]
df_cancelled.shape

In [None]:
#####
df_resort = df_booked[df_booked['hotel']=='Resort']
df_city_hotel = df_booked[df_booked['hotel']=='City Hotel']

In [None]:
book_per = df_booked.is_canceled.count()/df.is_canceled.count() * 100
labels = ['Confirmed Bookings','Cancelled Bookings']
values = [book_per,100-book_per]
fig = go.Figure(data=[go.Pie(labels=labels, values=values)])
fig.update_layout(height = 420 ,width=600,template='presentation',title_text='Confirmed vs Cancelled %')
fig.show()


In [None]:
from plotly.subplots import make_subplots
hotel_book_per = df_booked.hotel[df_booked['hotel'] == 'City Hotel'].count()/df.hotel[df['hotel']=='City Hotel'].count() * 100
resort_book_per = df_booked.hotel[df_booked['hotel'] == 'Resort'].count()/df.hotel[df['hotel']=='Resort'].count() * 100

# Create subplots: use 'domain' type for Pie subplot
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=["Booked Hotel","Canceled Hotel"], values=[hotel_book_per,100-hotel_book_per]),1, 1)
fig.add_trace(go.Pie(labels=["Booked Resorts","Canceled Resorts"], values=[resort_book_per,100-resort_book_per]),1, 2)

# Use `hole` to create a donut-like pie chart
fig.update_traces(hole=.4, hoverinfo="label+percent")

fig.update_layout(height = 520 ,width=1000,template='plotly_dark',
    title_text="Confirmed & Cancelled Bookings % by Hotels and Resorts",
    # Add annotations in the center of the donut pies.
    annotations=[dict(text='Hotel', x=0.18, y=0.5, font_size=20, showarrow=False),
                 dict(text='Resort', x=0.82, y=0.5, font_size=20, showarrow=False)])
fig.show()

In [None]:
df.head(2)

In [None]:
#checking cancellation via year
#checking cancellation via months
fig = px.histogram(df_cancelled,x='arrival_date_year',title='Cancellation by Year',template='plotly_dark',color='arrival_date_year')
fig.update_layout(barmode='group',xaxis_title='Year',yaxis_title='Number of bookings',height=520,width=1000)
fig.show()

In [None]:
fig = px.histogram(df_cancelled , x = 'arrival_date_month',color='arrival_date_year',
                   template='plotly_dark',
                   labels={'arrival_date_year':'Years'})
fig.update_layout(title='Booking cancellation by Months (2015 - 2017)',
                  xaxis_title='Months',
                  yaxis_title='Number of Bookings',
                  barmode='stack',
                  height=520,width=1000)
fig.show()

In [None]:
fig = px.histogram(df_booked , x = 'arrival_date_month',color='arrival_date_year',
                   template='plotly_dark',
                   opacity=0.8,
                   #hover_data=df_booked['arrival_date_month'].count(),
                   labels={'arrival_date_year':'Years'})
fig.update_layout(title='Confirmed bookings by Months (2015 - 2017)',
                  xaxis_title='Months',
                  yaxis_title='Number of Bookings',
                  barmode='stack',
                  height=520,width=1080)
fig.show()

In [None]:
# Agents Confirmed Bookings
df_agentby = df_booked.groupby(['hotel','agent']).agg('count').reset_index()
df_agentby = df_agentby.iloc[:,:3]
df_agentby.rename(columns={'agent':'agent_id','is_canceled':'Number_of_bookings'},inplace=True)
df_agentby['agent_id'] = df_agentby['agent_id'].astype(int)
# top 10 agents
top_10_agents = df_agentby.sort_values(by='Number_of_bookings',ascending=False).head(10)
top_10_agents.head()


In [None]:
fig = px.histogram(top_10_agents,x='agent_id',y='Number_of_bookings',color='agent_id',labels={'agent_id':'Agent ID Number'})
fig.update_layout(title='Top 10 Agents (Agents whose Bookings got Confirmed)',yaxis_title='Number_of_bookings_placed',
                  template='ggplot2',barmode='group',height=520,width=1000)

In [None]:
# Agents whose Bookings got cancelled
df_agentby = df_cancelled.groupby(['hotel','agent']).agg('count').reset_index()
df_agentby = df_agentby.iloc[:,:3]
df_agentby.rename(columns={'agent':'agent_id','is_canceled':'Number_of_bookings'},inplace=True)
df_agentby['agent_id'] = df_agentby['agent_id'].astype(int)
# top 20 agents
top_10_agents = df_agentby.sort_values(by='Number_of_bookings',ascending=False).head(10)
top_10_agents.head()


In [None]:
fig = px.histogram(top_10_agents,x='agent_id',y='Number_of_bookings',color='agent_id',labels={'agent_id':'Top 10 Agents'})
fig.update_layout(title='Top 10 Agents (Agents whose Bookings got Cancelled)',xaxis_title='AGENT IDs Number',yaxis_title='Number_of_bookings_placed',
                  template='ggplot2',barmode='group',height=520,width=1000)

## Customer type <br>
At most hotels, guests fall into two main categories: group and non-group. Amongst the non-group category are Transient Travellers: these are guests who are predominantly on-the-move and seek short (and often urgent!) hotel-stays.

*Transient* travellers can include:

Walk-in guests
Guests with a last-minute booking, and/or
Simply individual guests requiring a short stay at the hotel

In [None]:
df_cancelled.customer_type.value_counts()

In [None]:
fig = go.Figure()
fig.add_trace(go.Pie(labels=df_cancelled.customer_type.unique(),values=df_cancelled.customer_type.value_counts(),name='Cancel'))
fig.update_layout(height = 400 ,width=720,template='plotly_dark',title_text='Cancelled Bookings: by Customer Types')
fig.show()

In [None]:
df_booked.customer_type.value_counts()

In [None]:
fig = px.pie(df_booked,names=df_booked.customer_type.unique(),values=df_booked.customer_type.value_counts())
fig.update_layout(height = 400 ,width=720,template='plotly_dark',title_text='Confirmed Bookings:by Customer Types')
fig.show()

In [None]:
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_trace(go.Pie(labels=df_resort.customer_type.unique(),values=df_resort.customer_type.value_counts(),name='Resorts'),1, 1)
fig.add_trace(go.Pie(labels=df_city_hotel.customer_type.unique(),values=df_city_hotel.customer_type.value_counts(),name='City Hotels'),1, 2)
fig.update_layout(height = 520 ,width=1000,template='plotly_dark',title_text='Confirmed Bookings: Customer Types for Resorts and City Hotel')

fig.show()

## Stays Analysis

In [None]:
df_booked.head(2)

In [None]:
df_booked.drop(df_booked[(df_booked['adults']==0) & (df_booked['children']==0) & (df_booked['babies']==0)].index, inplace=True)

In [None]:
import warnings
warnings.filterwarnings('ignore')

df_booked['total_nights'] = df_booked['stays_in_weekend_nights'] + df_booked['stays_in_week_nights']
col = df_booked.pop('total_nights')
df_booked.insert(2,'total_nights', col)

df_booked['total_guests'] = df_booked['adults'] +df_booked['children']+df_booked['babies']
df_booked['total_guests'] = df_booked['total_guests'].astype(int)
col1 = df_booked.pop('total_guests')
df_booked.insert(3,'total_guests', col1)

df_stays = df_booked.iloc[:,:4]
df_stays.head()

In [None]:
df_stay = df_stays.groupby(['hotel','total_nights']).agg('count').reset_index() #high level step
df_stay = df_stay.iloc[:, :3]
df_stay = df_stay.rename(columns={'is_canceled':'Number of stays'})
df_stay.head()

In [None]:
fig = px.histogram(df_stays,x='hotel',
                        title='For how many days, Properties are booked',
                        template = 'plotly_dark',
                        opacity=0.8,
                        color = 'total_nights',
                        labels={'total_nights':'Number of Nights'}
                        )
fig.update_layout(barmode='group',height=520,width=1000,yaxis_title='Number of Bookings',xaxis_title='Type Of Property') #legend_traceorder="reversed"
fig.show()

fig = px.histogram(df_booked,x='total_nights',
                        title='For how much days, Hotels are booked',
                        template = 'plotly_dark',
                        opacity=0.8,
                        color = 'hotel',
                        labels={'stays_in_weekend_nights':'Number of days'}
                        )
fig.update_layout(barmode='group',height=700,width=1200,yaxis_title='Number of Bookings') #legend_traceorder="reversed"
fig.show()

## Analysis on Guests

In [None]:

df_guest = df_booked.groupby(['hotel','total_guests']).agg('count').reset_index() #high level step
df_guest = df_guest.iloc[:, :3]
df_guest = df_guest.rename(columns={'is_canceled':'total_guests_count','total_guests':'Num_guest'})
df_guest.head(30)

In [None]:
df_booked.head(2)

In [None]:
## Important step
import sort_dataframeby_monthorweek as st

def sort(df,column):
    return st.Sort_Dataframeby_Month(df, column)

In [None]:
year_resort_guest= df_resort['arrival_date_year'].value_counts().reset_index()
year_hotel_guest= df_city_hotel['arrival_date_year'].value_counts().reset_index()
year_resort_guest.columns = ['year', 'resort_guests']
year_hotel_guest.columns = ['year', 'hotel_guests']
year_resort_guest.sort_values(by=['year'],axis=0,inplace=True)
year_hotel_guest.sort_values(by=['year'],axis=0,inplace=True)
yearwise_guests = pd.merge(left=year_resort_guest,right=year_hotel_guest,left_on='year', right_on='year')
#yearwise_guests=pd.concat([year_resort_guest, year_hotel_guest], ignore_index=True)
yearwise_guests


In [None]:
fig = px.line(yearwise_guests,x='year',y=['resort_guests','hotel_guests'],labels={'variable':'Guests'})
fig.update_layout(title='Guests Over-year',yaxis_title='Number of Guests',height=520,width=1080,template='plotly_dark')
fig.show()

In [None]:
df_booked.head(2)

In [None]:
#create a data frame dictionary to store your data frames
year_wise = {elem : pd.DataFrame for elem in df_booked.arrival_date_year.unique()}
for key in year_wise.keys():
    year_wise[key] = df_booked[:][df_booked['arrival_date_year'] == key]

In [None]:
df_2015 = year_wise[2015]
df_2016 = year_wise[2016]
df_2017 = year_wise[2017]

In [None]:
guest_2015 = df_2015.arrival_date_month.value_counts().reset_index()
guest_2015.columns=['month','guests']
guest_2015 = sort(guest_2015,'month')
guest_2015

In [None]:
guest_2016 = df_2016.arrival_date_month.value_counts().reset_index()
guest_2016.columns=['month','guests']
guest_2016 = sort(guest_2016,'month')
guest_2016

In [None]:
guest_2017 = df_2017.arrival_date_month.value_counts().reset_index()
guest_2017.columns=['month','guests']
guest_2017 = sort(guest_2017,'month')
guest_2017

In [None]:
#Year wise months analysis
fig = go.Figure()

fig.add_trace(
    go.Scatter(
        x=guest_2016.month,
        y=guest_2015.guests,
        mode='lines',name='2015'
    ))

fig.add_trace(
    go.Scatter(
        x=guest_2016.month,
        y=guest_2016.guests,
        mode='lines',name='2016'
    ))
fig.add_trace(
    go.Scatter(
        x=guest_2016.month,
        y=guest_2017.guests,
        mode='lines',name='2017'
    ))
fig.update_layout(template='plotly_dark',height=520,width=1000,yaxis_title='Number of Guests',title='Year-wise Number of Guests Analysis (Granularity:Months)')
fig.show()


In [None]:
#Property wise analysis
month_resort_guest = df_resort.arrival_date_month.value_counts().reset_index()
month_hotel_guest = df_city_hotel.arrival_date_month.value_counts().reset_index()
month_resort_guest.columns =['month','resort_guests']
month_hotel_guest.columns =['month','hotel_guests']
monthwise_guests = pd.merge(left=month_resort_guest, right=month_hotel_guest, left_on='month', right_on='month')
monthwise_guests = sort(monthwise_guests,'month')
monthwise_guests


In [None]:
fig = px.line(monthwise_guests,x='month',y=['resort_guests','hotel_guests'],labels={'variable':'Guests'})
fig.update_layout(height= 520,width=1000,template='plotly_dark',title='Number of Guests Month Wise',yaxis_title='Number of Guests')

## Price Analysis  <br>
(An American Depository Receipt (ADR) is a physical certificate evidencing ownership of American Depository Shares (ADS). An ADS is a US dollar denominated form of equity ownership in a non-US company. The ADS represents the foreign shares of the company held on deposit by a custodian bank in the company's home country and carries the corporate and economic rights of the foreign shares, subject to the terms specified on the ADR certificate.)

In [None]:
df_booked.rename(columns={'adr':'price_adr'},inplace=True)
col = df_booked.pop('price_adr')
df_booked.insert(6,'price_adr',col)

price_df_booked = pd.DataFrame()
df_booked = df_booked[~(df_booked['price_adr']==0)]
price_df_booked = df_booked

price_resort = pd.DataFrame()
price_hotel = pd.DataFrame()
price_resort = price_df_booked[price_df_booked['hotel']=='Resort']
price_hotel = price_df_booked[price_df_booked['hotel']=='City Hotel']



In [None]:
fig = px.scatter(price_hotel,y='price_adr',x='total_nights',color='price_adr',title='Payments made in City Hotel')
fig.update_traces(marker={'size': 10})
fig.update_layout(height=600,width=1080,template='plotly_dark',xaxis_title = 'Number of nights stayed',yaxis_title='Price paid in ADR')
fig.show()

In [None]:
fig = px.scatter(price_resort,y='price_adr',x='total_nights',color='price_adr',title='Payments in Resort')
fig.update_traces(marker={'size': 10})
fig.update_layout(height=600,width=1080,template='plotly_dark',xaxis_title = 'Number of nights stayed',yaxis_title='Price paid in ADR')
fig.show()

In [None]:
year_price_resort =price_resort.groupby(['arrival_date_year'])['price_adr'].mean().reset_index()
year_price_resort = year_price_resort.rename(columns={'arrival_date_year':'years','price_adr':'mean_price'})
year_price_resort

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=year_price_resort.years,y=year_price_resort.mean_price))
fig.update_layout(title='Price variying overyears (2015 - 2017)',xaxis_title='Year',yaxis_title='Price (ADR)',
                    height=500,width=900,template='presentation')
fig.show()

In [None]:
# PRice over time (RESORTS)
month_price_resort =price_resort.groupby(['arrival_date_month'])['price_adr'].mean().reset_index()
month_price_resort = sort(month_price_resort,'arrival_date_month')
month_price_resort = month_price_resort.rename(columns={'arrival_date_month':'months','price_adr':'mean_price_resort'})


# PRice over time (CITY HOTELS)
month_price_hotel =price_hotel.groupby(['arrival_date_month'])['price_adr'].mean().reset_index()
month_price_hotel = sort(month_price_hotel,'arrival_date_month')
month_price_hotel = month_price_hotel.rename(columns={'arrival_date_month':'months','price_adr':'mean_price_hotel'})

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=month_price_resort.months,y=month_price_resort.mean_price_resort,name='For Resort'))
fig.add_trace(go.Scatter(x=month_price_hotel.months,y=month_price_hotel.mean_price_hotel,name='For City Hotel'))
fig.update_layout(title='Price variying overtime (by: months)',xaxis_title='Months',yaxis_title='Price (ADR)',
                    height=600,width=1080,template='plotly_dark')
fig.show()


In [None]:
df_booked

### Revenue 

In [None]:
# Revenue over months (RESORTS)
# PRice over time 
month_revenue_resort = price_resort.groupby(['arrival_date_month'])['price_adr'].sum().reset_index()
month_revenue_resort = sort(month_revenue_resort,'arrival_date_month')
month_revenue_resort = month_revenue_resort.rename(columns={'arrival_date_month':'months','price_adr':'net_revenue_res'})


# PRice over time (CITY HOTELS)
month_revenue_hotel =price_hotel.groupby(['arrival_date_month'])['price_adr'].sum().reset_index()
month_revenue_hotel = sort(month_revenue_hotel,'arrival_date_month')
month_revenue_hotel = month_revenue_hotel.rename(columns={'arrival_date_month':'months','price_adr':'net_revenue_hotel'}) 



In [None]:
total_rev = pd.merge(left=month_revenue_resort,right=month_revenue_hotel,left_on='months',right_on='months')
total_rev

In [None]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=month_revenue_resort.months,y=month_revenue_resort.net_revenue_res,name='For Resort'))
fig.add_trace(go.Scatter(x=month_revenue_hotel.months,y=month_revenue_hotel.net_revenue_hotel,name='For City Hotel'))
fig.update_layout(title='Revenue Overmonths',xaxis_title='Months',yaxis_title='Price (ADR)',
                    height=600,width=1080,template='plotly_dark')
fig.show()

### Loss due to cancellations of Bookings:

In [None]:
df_cancelled.head()

In [None]:
df_cancelled.rename(columns={'adr':'price_adr'},inplace=True)
col = df_cancelled.pop('price_adr')
df_cancelled.insert(6,'price_adr',col)

price_df_can = pd.DataFrame()
df_cancelled = df_cancelled[~(df_cancelled['price_adr']==0)]
price_df_can = df_cancelled

price_resort_can = pd.DataFrame()
price_hotel_can = pd.DataFrame()
price_resort_can = price_df_can[price_df_can['hotel']=='Resort']
price_hotel_can = price_df_can[price_df_can['hotel']=='City Hotel']

In [None]:
# PRice over time (RESORTS)
month_price_resort_can =price_resort_can.groupby(['arrival_date_month'])['price_adr'].sum().reset_index()
month_price_resort_can = sort(month_price_resort_can,'arrival_date_month')
month_price_resort_can = month_price_resort_can.rename(columns={'arrival_date_month':'months','price_adr':'revenue_resort'})


# PRice over time (CITY HOTELS)
month_price_hotel_can =price_hotel_can.groupby(['arrival_date_month'])['price_adr'].sum().reset_index()
month_price_hotel_can = sort(month_price_hotel_can,'arrival_date_month')
month_price_hotel_can = month_price_hotel_can.rename(columns={'arrival_date_month':'months','price_adr':'revenue_hotel'})

In [None]:
total_loss = pd.merge(left=month_price_resort_can,right=month_price_hotel_can,left_on='months',right_on='months')
total_loss

In [None]:
fig = go.Figure()
fig.add_trace(go.Line(x=month_price_resort_can.months,y=month_price_resort_can.revenue_resort,name='For Resort'))
fig.add_trace(go.Scatter(x=month_price_hotel_can.months,y=month_price_hotel_can.revenue_hotel,name='For City Hotel'))
fig.update_layout(title='Loss overtime due to cancellations (by: months)',xaxis_title='Months',yaxis_title='Price (ADR)',
                    height=600,width=1080,template='plotly_dark')
fig.show()


## MArket segment

In [None]:
df_booked.head(2)

In [None]:
df_booked['market_segment'].value_counts()

In [None]:
fig= go.Figure()
fig.add_traces(go.Pie(labels=df_cancelled['market_segment'].unique(),values=df_cancelled['market_segment'].value_counts()))
fig.update_layout(title='Cancelled Booking: by Market Segment',height=420,width=720,template='presentation')
fig.show()

In [None]:
fig= go.Figure()
fig.add_traces(go.Pie(labels=df_booked['market_segment'].unique(),values=df_booked['market_segment'].value_counts()))
fig.update_layout(title='Confirmed Booking: by Market Segment',height=420,width=720,template='presentation')
fig.show()

In [None]:
df_market_y = df_booked.groupby(['arrival_date_year','market_segment']).agg('count').reset_index() #high level step
df_market_y = df_market_y.iloc[:, :3]
df_market_y = df_market_y.rename(columns={'arrival_date_year':'year','hotel':'number_of_bookings'})
df_market_y

In [None]:

fig = px.histogram(df_market_y,x='year',y='number_of_bookings',color='market_segment')
fig.update_layout(title='Total Bookings via Market Segments( by: Years)',xaxis_title='Year',yaxis_title='Confirmed Bookings',
                  barmode='group',height=600,width=1080,template='plotly_dark')
fig.show()

In [None]:
df_market = df_booked.groupby(['arrival_date_month','market_segment']).agg('count').reset_index() #high level step
df_market = df_market.iloc[:, :3]
df_market = df_market.rename(columns={'arrival_date_month':'months','hotel':'number_of_bookings'})
df_market = sort(df_market,'months')

fig = px.histogram(df_market,x='months',y='number_of_bookings',color='market_segment')
fig.update_layout(title='Total Bookings via Market Segments (by:Months)',xaxis_title='Market Segment',yaxis_title='Confirmed Bookings',
                  barmode='group',height=600,width=1080,template='plotly_dark')
fig.show()

In [None]:
revenue_resort

In [None]:
revenue_resort = price_resort.groupby(['market_segment'])['price_adr'].sum().reset_index()
revenue_resort = revenue_resort.rename(columns={'market_segment':'market_segment','price_adr':'net_revenue_resort'})
revenue_hotel = price_hotel.groupby(['market_segment'])['price_adr'].sum().reset_index()
revenue_hotel = revenue_hotel.rename(columns={'market_segment':'market_segment','price_adr':'net_revenue_hotel'})

fig = go.Figure()
fig.add_trace(go.Line(x=revenue_resort.market_segment,y=revenue_resort.net_revenue_resort,name='For Resort'))
fig.add_trace(go.Line(x=revenue_hotel.market_segment,y=revenue_hotel.net_revenue_hotel,name='For City Hotel'))
fig.update_layout(title='Total Revenue via Market Segments',xaxis_title='Market Segment',yaxis_title='Price (ADR)',
                    height=600,width=1080,template='plotly_dark')
fig.show()

In [None]:
# revenue by coustomer type
df_resort.head()


In [None]:
df_resort

In [None]:
cust_revenue_resort = pd.DataFrame()
cust_revenue_resort = price_resort.groupby(['customer_type'])['price_adr'].sum().reset_index()
cust_revenue_resort = cust_revenue_resort.rename(columns={'arrival_date_month':'customer_type','price_adr':'net_revenue'})
cust_revenue_hotel = pd.DataFrame
cust_revenue_hotel = price_hotel.groupby(['customer_type'])['price_adr'].sum().reset_index()
cust_revenue_hotel = cust_revenue_hotel.rename(columns={'arrival_date_month':'customer_type','price_adr':'net_revenue'})



In [None]:
cust_revenue_resort

In [None]:
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]])
fig.add_traces(go.Pie(values = cust_revenue_resort.net_revenue,labels=cust_revenue_resort.customer_type,name='Resort'),1,1)
fig.add_traces(go.Pie(values=cust_revenue_hotel.net_revenue,labels=cust_revenue_hotel.customer_type,name="hotel"),1,2)
fig.update_layout(title='Revenue from Resorts and City Hotel: by Customer Types',width=1000,template='presentation')
fig.show()

In [None]:
figure_data=[]
figure_data.extend([i for i in px.histogram(cust_revenue_resort,x='customer_type',y='net_revenue').to_dict()['data']])
figure_data.extend([i for i in px.histogram(cust_revenue_hotel,x='customer_type',y='net_revenue').to_dict()['data']])
fig=go.Figure(figure_data)
fig.update_layout(barmode='group',title='Resort vs Hotel Revenue by Customer Type',template='plotly_dark',width=720)
fig.update_traces(overwrite=True, marker={"opacity": 0.8}) 
fig.show()

In [None]:
#Revenue over months
cust_revenue_res_mon = pd.DataFrame()
cust_revenue_res_mon = price_resort.groupby(['customer_type','arrival_date_month'])['price_adr'].sum().reset_index()
cust_revenue_res_mon = cust_revenue_res_mon.rename(columns={'arrival_date_month':'months','price_adr':'net_revenue'})
cust_revenue_res_mon = sort(cust_revenue_res_mon,'months')
cust_revenue_hot_mon = pd.DataFrame
cust_revenue_hot_mon = price_hotel.groupby(['customer_type','arrival_date_month'])['price_adr'].sum().reset_index()
cust_revenue_hot_mon = cust_revenue_hot_mon.rename(columns={'arrival_date_month':'months','price_adr':'net_revenue'})
cust_revenue_hot_mon = sort(cust_revenue_hot_mon,'months')



In [None]:
fig = px.line(cust_revenue_res_mon,x='months',y='net_revenue',color='customer_type')
fig.update_layout(title='Restro Overtime Revenue by Customer Types',yaxis_title='Revenue (ADR)',xaxis_title='Months')


In [None]:
fig = px.line(cust_revenue_hot_mon,x='months',y='net_revenue',color='customer_type')
fig.update_layout(title='City Hotel Overtime Revenue by Customer Types',yaxis_title='Revenue (ADR)',xaxis_title='Months')


In [None]:
df_booked.required_car_parking_spaces.value_counts()

In [None]:
df_booked.deposit_type.value_counts()

Thank you

by : **Kumar Shivam**