In [1]:
import math
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.gridspec as gridspec
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [3]:
#importing all datasets

cities = ['amsterdam', 'athens', 'barcelona', 'berlin',
          'budapest', 'lisbon', 'london', 'paris',
          'rome', 'vienna']

days = ['weekdays', 'weekends']

all_airbnb_prices = pd.read_csv("/content/drive/MyDrive/Career/Data_Analyst/Projects/Personal/amsterdam_weekdays.csv",
                                index_col=[0])
all_airbnb_prices['city'] = 'Amsterdam'
all_airbnb_prices['weekday'] = True
count = all_airbnb_prices.shape[0]

i = 0
for city in cities:
  for day in days:
    if i == 0:
      i += 1
      continue
    import_string = "/content/drive/MyDrive/Career/Data_Analyst/Projects/Personal/{}_{}.csv".format(city, day)
    placeholder_df = pd.read_csv(import_string, index_col=[0])
    placeholder_df['city'] = city.capitalize()
    placeholder_df['weekday'] = True if day=="weekdays" else False
    count += placeholder_df.shape[0]
    all_airbnb_prices = pd.concat([all_airbnb_prices, placeholder_df], ignore_index=True)

In [4]:
# Drop redundant/not useful columns

all_airbnb_prices.drop(['attr_index', 'attr_index_norm', 'rest_index', 'rest_index_norm'], axis=1, inplace=True)

# Change True/False columns to 0/1 columns

all_airbnb_prices[['room_shared', 'room_private',
                 'host_is_superhost', 'weekday']] = all_airbnb_prices[['room_shared',
                                                          'room_private', 'host_is_superhost',
                                                            'weekday']].astype(int)

In [5]:
all_airbnb_prices['duplicated'] = all_airbnb_prices.duplicated(subset=["lat", "lng"], keep='last')

In [6]:
def find_outliers(city, column, day):

  q1 = np.quantile(all_airbnb_prices.query("city == @city and weekday == @day")[column], 0.25)
  q3 = np.quantile(all_airbnb_prices.query("city == @city and weekday == @day")[column], 0.75)

  iqr = q3-q1

  upper_bound = q3 + (1.5 * iqr)
  lower_bound = q1 - (1.5 * iqr)

  outlier_mask = ((all_airbnb_prices[column] > upper_bound) &
                  (all_airbnb_prices['city'] == city) &
                  (all_airbnb_prices['weekday'] == day))
  return outlier_mask

cities = all_airbnb_prices['city'].unique().tolist()
mask_list = []
for city_name in cities:
  for day in range(0, 2):
    mask_list.append(find_outliers(city=city_name, column='realSum', day=day))

full_outlier_mask = mask_list[0]
for i in range (1, len(mask_list)):
  full_outlier_mask = full_outlier_mask | mask_list[i]

all_airbnb_prices['all_outliers'] = full_outlier_mask

In [7]:
pio.renderers.default = "notebook_connected"

fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Athens" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=20, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Athens AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 23.61120,
                                 "east": 23.86352,
                                 "south": 37.91931,
                                 "north": 38.05856})


fig.show()

In [8]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Athens" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Athens AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 23.61120,
                                 "east": 23.86352,
                                 "south": 37.91931,
                                 "north": 38.05856})

fig.show()

In [9]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Budapest" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Budapest AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 18.92,
                                 "east": 19.31,
                                 "south": 47.38,
                                 "north": 47.61})

fig.show()

In [10]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Budapest" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=10, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Budapest AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 18.92,
                                 "east": 19.31,
                                 "south": 47.38,
                                 "north": 47.61})

fig.show()

In [11]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Rome" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=25, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Rome AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 12.39096,
                                 "east": 12.59352,
                                 "south": 41.80827,
                                 "north": 41.96393})

fig.show()

In [12]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Rome" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Rome AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 12.39096,
                                 "east": 12.59352,
                                 "south": 41.80827,
                                 "north": 41.96393})

fig.show()

In [13]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Lisbon" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=20, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Lisbon AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": -9.23730,
                                 "east": -9.07463,
                                 "south": 38.68726,
                                 "north": 38.80534})

fig.show()

In [14]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Lisbon" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Lisbon AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": -9.23730,
                                 "east": -9.07463,
                                 "south": 38.68726,
                                 "north": 38.80534})

fig.show()

In [15]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Vienna" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Vienna AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 16.19,
                                 "east": 16.57,
                                 "south": 48.1,
                                 "north": 48.31})

fig.show()

In [16]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Vienna" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=10, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Vienna AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 16.19,
                                 "east": 16.57,
                                 "south": 48.1,
                                 "north": 48.31})

fig.show()

In [17]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Berlin" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Berlin AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 13.10,
                                 "east": 13.73,
                                 "south": 52.36,
                                 "north": 52.66})


fig.show()

In [18]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Berlin" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Berlin AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 13.10,
                                 "east": 13.73,
                                 "south": 52.36,
                                 "north": 52.66})


fig.show()

In [19]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Barcelona" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Barcelona AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 2.08,
                                 "east": 2.25,
                                 "south": 41.32,
                                 "north": 41.48})

fig.show()

In [20]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Barcelona" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Barcelona AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 2.08,
                                 "east": 2.25,
                                 "south": 41.32,
                                 "north": 41.48})

fig.show()

In [21]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "London" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=40, zoom=0, height=1100)

fig.update_layout(title={"text": "Map of London AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

london_bounds_west = math.floor(all_airbnb_prices.query('city == "London" and duplicated == False')['lng'].min() * 10)/10
london_bounds_east = math.ceil(all_airbnb_prices.query('city == "London" and duplicated == False')['lng'].max() * 10)/10
london_bounds_south = math.floor(all_airbnb_prices.query('city == "London" and duplicated == False')['lat'].min() * 10)/10
london_bounds_north = math.ceil(all_airbnb_prices.query('city == "London" and duplicated == False')['lat'].max() * 10)/10

fig.update_layout(mapbox_bounds={"west": london_bounds_west,
                                 "east": london_bounds_east,
                                 "south": london_bounds_south,
                                 "north": london_bounds_north})

fig.update_layout(mapbox=dict(center=go.layout.mapbox.Center(lat=51.505, lon=-0.105)))


fig.show()

In [22]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "London" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=12, zoom=0, height=1100)

fig.update_layout(title={"text": "Map of London AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": london_bounds_west,
                                 "east": london_bounds_east,
                                 "south": london_bounds_south,
                                 "north": london_bounds_north})

fig.update_layout(mapbox=dict(center=go.layout.mapbox.Center(lat=51.505, lon=-0.105)))


fig.show()

In [23]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Paris" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Paris AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 2.24,
                                 "east": 2.445,
                                 "south": 48.79,
                                 "north": 48.92})


fig.show()

In [24]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Paris" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=15, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Paris AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 2.24,
                                 "east": 2.445,
                                 "south": 48.79,
                                 "north": 48.92})


fig.show()

In [25]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Amsterdam" and duplicated == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=30, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Amsterdam AirBnBs Sized on Price and Colored on Distance from Nearest Metro",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 4.73,
                                 "east": 5.04,
                                 "south": 52.275,
                                 "north": 52.435})

fig.show()

In [26]:
fig = px.scatter_mapbox(
    all_airbnb_prices.query('city == "Amsterdam" and duplicated == False and all_outliers == False'),
    lat="lat", lon="lng",
    color="metro_dist",
    size="realSum",
    mapbox_style="open-street-map",
    hover_data={"realSum": ':.3f',
                "room_type": True,
                "metro_dist": ':.3f',
                "dist": ':.3f',
                "lat": ':.5f',
                "lng": ':.5f'},
    color_continuous_scale=px.colors.sequential.Bluered,
    size_max=13, zoom=0, height=1300)

fig.update_layout(title={"text": "Map of Amsterdam AirBnBs Sized on Price and Colored on Distance from Nearest Metro Without Outliers",
                         "xref": "paper",
                         "xanchor": "auto",
                         "x": 0.5})

fig.update_layout(mapbox_bounds={"west": 4.73,
                                 "east": 5.04,
                                 "south": 52.275,
                                 "north": 52.435})

fig.show()