## Visualization 

In this notebook I created visualizations to compare the Airbnb market in three European cities: Rome, Barcelona and Stockholm.

In [None]:
# importing packages

import pandas as pd
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

In [None]:
# defining colors for the graphic

c =['#F2CC8F','#E07A5F', '#81B29A', '#F4F1DE', '#3D405B'] 

In [None]:
# load the dataset about Barcelona

df_bcn = pd.read_csv('data/listings_Barcelona.csv')

In [None]:
# check the first lines

df_bcn.head(2)

In [None]:
# check the columns of the dataframe

df_bcn.columns

#### Categorizing the hosts

Hosts will be categorized based on the number of accomodation their offer:
- 1 liting
- 2-4 listings
- 4-10 listings
- more than 10 listings

In [None]:
conditions = [
    (df_bcn['calculated_host_listings_count'] == 1),
    (df_bcn['calculated_host_listings_count'] > 1) & (df_bcn['calculated_host_listings_count'] < 4),
    (df_bcn['calculated_host_listings_count'] > 3) & (df_bcn['calculated_host_listings_count'] < 11),
    (df_bcn['calculated_host_listings_count'] > 10)
    ]

In [None]:
values = ['single_listing', '2-3_listings', '4-10_listings', 'above_10']

#### Preprocessing of the dataframes

#### Barcelona

In [None]:
# recode hosts in Barcelona

df_bcn['host_type'] = np.select(conditions, values)

In [None]:
df_bcn['city'] = 'BCN'

In [None]:
# select relevant columns

df_bcn_part = df_bcn[['id', 'host_id', 'room_type', 'price', 'minimum_nights', 'host_type', 'city']]

In [None]:
df_bcn_part.head()

In [None]:
# remove currency from the price column

df_bcn_part['price'] = df_bcn_part['price'].str[1:]

In [None]:
# remove comma from the price column

df_bcn_part['price'] = df_bcn_part['price'].str.replace(',', '')

In [None]:
# change price into a numeric variable

df_bcn_part['price'] = pd.to_numeric(df_bcn_part['price'])

#### Stockholm

In [None]:
# read data into a pandas dataframe

df_sthlm = pd.read_csv('data/listings_Stockholm.csv')

In [None]:
# check the first lines

df_sthlm.head(2)

In [None]:
conditions = [
    (df_sthlm['calculated_host_listings_count'] == 1),
    (df_sthlm['calculated_host_listings_count'] > 1) & (df_sthlm['calculated_host_listings_count'] < 4),
    (df_sthlm['calculated_host_listings_count'] > 3) & (df_sthlm['calculated_host_listings_count'] < 11),
    (df_sthlm['calculated_host_listings_count'] > 10)
    ]

In [None]:
df_sthlm['host_type'] = np.select(conditions, values)

In [None]:
df_sthlm['city'] = 'STHLM'

In [None]:
# select relevant columns

df_sthlm_part = df_sthlm[['id', 'host_id', 'room_type', 'price', 'minimum_nights', 'host_type', 'city']]

In [None]:
df_sthlm_part.head()

In [None]:
# remove currency symbol from the price column

df_sthlm_part['price'] = df_sthlm_part['price'].str[1:]


In [None]:
# remove the comme from price

df_sthlm_part['price'] = df_sthlm_part['price'].str.replace(',', '')

In [None]:
# change price into a numeric variable

df_sthlm_part['price'] = pd.to_numeric(df_sthlm_part['price'])

In [None]:
# recalculate prices from crone to dollars

df_sthlm_part['price'] = df_sthlm_part['price']* 0.0883

In [None]:
df_sthlm_part.head()

In [None]:
#### Rome

In [None]:
# read data into a pandas dataframe

df_rome = pd.read_csv('data/listings_Rome.csv')

In [None]:
# check the first some lines

df_rome.head(2)

In [None]:
conditions = [
    (df_rome['calculated_host_listings_count'] == 1),
    (df_rome['calculated_host_listings_count'] > 1) & (df_rome['calculated_host_listings_count'] < 4),
    (df_rome['calculated_host_listings_count'] > 3) & (df_rome['calculated_host_listings_count'] < 11),
    (df_rome['calculated_host_listings_count'] > 10)
    ]

In [None]:
df_rome['host_type'] = np.select(conditions, values)

In [None]:
# add city column

df_rome['city'] = 'ROM'

In [None]:
#select relevant columns

df_rome_part = df_rome[['id', 'host_id', 'room_type', 'price', 'minimum_nights', 'host_type', 'city']]

In [None]:
# chack the data

df_rome_part.head()

In [None]:
# remove the currecny symbol from the price

df_rome_part['price'] = df_rome_part['price'].str[1:]

In [None]:
# remove comma

df_rome_part['price'] = df_rome_part['price'].str.replace(',', '')

In [None]:
# turn price into a numeric variable

df_rome_part['price'] = pd.to_numeric(df_rome_part['price'])

In [None]:
# concatiante the 3 cities into one dataframe

df_3 = pd.concat([df_bcn_part, df_sthlm_part, df_rome_part], axis=0)

In [None]:
# check on the city codes

df_3.city.unique()

In [None]:
df_3.room_type.unique()

In [None]:
# check the number and type of rooms in each cities

df_3.groupby(['city', 'room_type']).room_type.count() 

### Figure 1: Number of different types of accomodation offered in the three cities

In [None]:

# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=df_3, kind='count', x= 'city', hue = 'room_type',  legend = '', palette = c, edgecolor = "#242424", hue_order=['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'])
titel = plt.title('Number of accomodation in the different cities')
legend = plt.legend(['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'], loc=2, frameon=False)   
for text in legend.get_texts():
    text.set_color("white")

# change axes labels and ticks to white    
xlabel = plt.xlabel('Type of accomodation')
ylabel = plt.ylabel('Number of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")

titel.set_color("white")

    
for ax in g.axes.ravel():    
    # add annotations
    for ca in ax.containers:

        #custom label calculates percent and add an empty string so 0 value bars don't have a number
        labels = [f'{h/df_3.room_type.count()*100:0.1f}%' if (h := v.get_height()) > 0 else '' for v in ca]

        ax.bar_label(ca, labels=labels, label_type='edge', color ="white")
        
        
# Turns off grid on the left Axis.
ax.grid(False)

# set figure size in inches   
g.fig.set_size_inches(8,5);

### Figure 1b: Number of different types of accomodation (stacked plot)

In [None]:
# aggregate data for the stacked plot

plot = df_3.groupby(['city'])['room_type'].value_counts(normalize=True).mul(100).reset_index(name='percentage')

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.histplot(x = 'city' , hue = 'room_type', stat='count', weights= 'percentage', multiple = 'stack', data = plot, legend = '', palette = c, shrink = 0.5, edgecolor = "none")
titel = plt.title('Percentage of accomodation in the different cities')
legend = plt.legend(['Shared room', 'Hotel room', 'Private room', 'Entire home/apt'], loc=0, frameon=False)   
for text in legend.get_texts():
    text.set_color("white")

for bars in ax.containers:
    heights = [b.get_height() for b in bars]
    labels = [f'{h * 100:.1f}%' if h > 0.001 else '' for h in heights]
    ax.bar_label(bars, labels=labels, label_type='center')
    
# change axes labels and ticks to white    
xlabel = plt.xlabel('Type of accomodation')
ylabel = plt.ylabel('Percentage of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")

titel.set_color("white")

# Turns off grid on the left Axis.
g.grid(False)

# set figure size in inches   
#g.ax.fig.set_size_inches(8,5);
sns.despine();


### Figure 1c - with % on axis y

In [None]:
new_df = df_3.groupby('city')['room_type'].value_counts(normalize=True)
new_df = new_df.mul(100).rename('Percent').reset_index()

In [None]:
new_df

In [None]:

# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=new_df, kind='bar', x='city', y= 'Percent', hue='room_type', legend = False, palette = c, edgecolor = "#242424", hue_order=['Entire home/apt', 'Private room', 'Hotel room', 'Shared room'])

g.ax.set_ylim(0,100)

#titel = plt.title('Percentage of accomodation type in the different cities')
plt.legend(labelcolor='white', edgecolor = 'none', loc='upper left')  


# change axes labels and ticks to white    
#xlabel = plt.xlabel('Location')
ylabel = plt.ylabel('Percentage of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")
g.set_yticklabels(['0','20%','40%','60%','80%','100%'])

titel.set_color("white")

      
# Turns off grid on the left Axis.
ax.grid(False)
g.ax.yaxis.grid(False)
sns.despine()

# set figure size in inches   
g.fig.set_size_inches(8,5);

### Figure 2: Host types in the three cities

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=df_3, kind='count', x= 'city', hue = 'host_type',  legend = '', palette = c, edgecolor = "#242424")
titel = plt.title('Number of hosts in the different cities')
legend = plt.legend(['single_listing', '2-4 listings', '4-10 listings', 'above 10 listings'], loc=2, frameon=False)   
for text in legend.get_texts():
    text.set_color("white")

# change axes labels and ticks to white    
xlabel = plt.xlabel('Type of host')
ylabel = plt.ylabel('Number of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")

titel.set_color("white")

# iterate through axes
for ax in g.axes.ravel():
    
    # add annotations to all bars on the graph
    for label in ax.containers:
        # add custom labels with the labels=labels parameter if needed
        #labels = [f'{h}' if (h := v.get_height()) > 0 else '' for v in c]
        ax.bar_label(label, label_type='edge', color = "white")
    ax.margins(y=0.2)  

# Turns off grid on the left Axis.
ax.grid(False)

# set figure size in inches   
g.fig.set_size_inches(8,5);


### Figure 2b - with % on axis y

In [None]:
new_df2 = df_3.groupby('city')['host_type'].value_counts(normalize=True)
new_df2 = new_df2.mul(100).rename('Percent').reset_index()

In [None]:
new_df2

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=new_df2, kind='bar', x='city', y= 'Percent', hue='host_type', legend = False, palette = c, edgecolor = "#242424", hue_order=['single_listing', '2-3_listings', '4-10_listings', 'above_10'])

g.ax.set_ylim(0,100)

#titel = plt.title('Percentage of host types in the different cities')
plt.legend(labelcolor='white', edgecolor = 'none', loc='upper left')  


# change axes labels and ticks to white    
#xlabel = plt.xlabel('Location')
ylabel = plt.ylabel('Percentage of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")
g.set_yticklabels(['0','20%','40%','60%','80%','100%'])

titel.set_color("white")

      
# Turns off grid on the left Axis.
ax.grid(False)
g.ax.yaxis.grid(False)
sns.despine()

# set figure size in inches   
g.fig.set_size_inches(8,5);

### Figure 3: Lenght of stay

In Barcelona for offering short terms accomodation (shorter than 31 day) a license is needed. However, long term offers do not need a license. 

In [None]:
df_3.columns

In [None]:
# add a variable (stay) to code the short (shorter than 31 days) and long stay offers

conditions = [
    (df_3['minimum_nights'] <= 31),
    (df_3['minimum_nights'] > 31),
    ]

In [None]:
values = ['short_stay', 'long_stay']

In [None]:
df_3['stay'] = np.select(conditions, values)

In [None]:
df_3.head()

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=df_3, kind='count', x= 'stay', hue = 'city',  legend = '', palette = c, edgecolor = 'none')

titel = plt.title('Length of stay offered in the different cities')
legend = plt.legend(['Barcelona', 'Stockholm', 'Rome'], loc=1, frameon=False)   
for text in legend.get_texts():
    text.set_color("white")

# change axes labels and ticks to white    
xlabel = plt.xlabel('Length of stay')
ylabel = plt.ylabel('Number of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=45, color='white')
ytick = plt.yticks(color="white")

titel.set_color("white")


# iterate through axes
for ax in g.axes.ravel():
    
    # add annotations to all bars on the graph
    for label in ax.containers:
        ax.bar_label(label, label_type='edge', color = "white")
    ax.margins(y=0.2)  

# Turns off grid on the left Axis.
ax.grid(False)

# set figure size in inches   
g.fig.set_size_inches(8,5);

### Figure 3b - with % on the y axis

In [None]:
new_df3 = df_3.groupby('city')['stay'].value_counts(normalize=True)
new_df3 = new_df3.mul(100).rename('Percent').reset_index()

In [None]:
new_df3

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})

# plot the data with a categorical plot
g = sns.catplot(data=new_df3, kind='bar', x='city', y= 'Percent', hue='stay', legend = False, palette = c, edgecolor = "#242424", hue_order=['short_stay', 'long_stay'])

g.ax.set_ylim(0,100)

#titel = plt.title('Percentage of accomodation offered for short and long stays in the different cities')
plt.legend(labelcolor='white', edgecolor = 'none', loc='upper left')  

# change axes labels and ticks to white    
#xlabel = plt.xlabel('Location')
ylabel = plt.ylabel('Percentage of offers')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=0, color='white')
g.set_xticklabels(['Barcelona','Stockholm','Rome'])
ytick = plt.yticks(color="white")
g.set_yticklabels(['0','20%','40%','60%','80%','100%'])

titel.set_color("white")

      
# Turns off grid on the left Axis.
ax.grid(False)
g.ax.yaxis.grid(False)
sns.despine()

# set figure size in inches   
g.fig.set_size_inches(8,5);

### Figure 4: Average price/accomodation type in the 3 cities

In [None]:
# check the column types

df_3.info()

In [None]:
# group by room type

df_3.groupby('room_type').price.describe()

In [None]:
# check the average price per room type in the different cities

df_3.groupby(['city','room_type']).price.mean()


In [None]:
# creating a new dataset without the price outliers, with the assumption the an accomodation/ night costs less than 2000 Euro

df_price = df_3.loc[df_3['price'] < 2000]

In [None]:
df_price.groupby('room_type').price.describe()

In [None]:
# set the background color to #242424 
sns.set(rc={'axes.facecolor':'#242424', 'figure.facecolor':'#242424'})


# plot the data with a categorical plot
g = sns.barplot(data=df_price, x= 'room_type', y= 'price', hue = 'city', errorbar = None, estimator=np.mean, palette = c, edgecolor = "#242424")


legend = plt.legend(['Barcelona', 'Stockholm', 'Rome'], loc=1, frameon=False)   
titel = plt.title('Average price / accomodation type in the different cities')
for text in legend.get_texts():
    text.set_color("white")


# change axes labels and ticks to white    
xlabel = plt.xlabel('Type of accomodation')
ylabel = plt.ylabel('Average price/ night ($)')

xlabel.set_color("white")
ylabel.set_color("white")

xtick = plt.xticks(rotation=45, color='white')
ytick = plt.yticks(color="white")

titel.set_color("white")

# Turns off grids
g.yaxis.grid(False)
sns.despine()

# set figure size in inches   
plt.gcf().set_size_inches(8,5);
