In [324]:
# Data manipulation
import pandas as pd # data manipulation and dataframes

# Data visualization
import plotly.express as px
import matplotlib.pyplot as plt
import seaborn as sns

import numpy as np

import calendar

In [325]:
# Pandas configuration
pd.set_option('display.max_columns', None)  # shows all columns
pd.set_option('display.max_colwidth', None)  # shows all cell content

In [326]:
# Reads csv
airbnb_madrid = pd.read_csv('../raw/eda-result/airbnb_madrid.csv', encoding='utf-8')
listings = pd.read_csv('../raw/airbnb/madrid/listings.csv', encoding='utf-8')

In [327]:
listings.head(1)

Unnamed: 0,id,name,host_id,host_name,neighbourhood_group,neighbourhood,latitude,longitude,room_type,price,minimum_nights,number_of_reviews,last_review,reviews_per_month,calculated_host_listings_count,availability_365,number_of_reviews_ltm,license
0,6369,Rental unit in Madrid · ★4.87 · 1 bedroom · 1 bed · 1 private bath,13660,Simon,Chamartín,Hispanoamérica,40.45724,-3.67688,Private room,90,4,104,2023-05-31,0.65,1,98,9,


In [328]:
len(listings['name'].unique().tolist())

6826

In [329]:
airbnb_madrid.head(1)

Unnamed: 0,Description,Score,Facilities,District,Neighbourhood,Room type,Price,Minimum nights,Host name,Number of reviews,Last review,Latitude,Longitude
0,Rental unit in madrid,★4.87,"1 bedroom, 1 bed, 1 private bath",Chamartín,Hispanoamérica,Private room,90,4,Simon,104,2023-05-31,40.45724,-3.67688


In [330]:
#airbnb_madrid['Last review'] = airbnb_madrid['Last review'].fillna('2023-07-26')
airbnb_madrid = airbnb_madrid.dropna()

In [331]:
airbnb_madrid["Last review"] = pd.to_datetime(airbnb_madrid["Last review"])
airbnb_madrid['Year'] = airbnb_madrid['Last review'].dt.strftime('%Y')


In [332]:
airbnb_madrid[airbnb_madrid['Last review'].dt.year == 2018].value_counts().sum()

445

In [333]:
airbnb_madrid['Last review'].dt.year.unique().tolist()

[2023, 2018, 2022, 2017, 2021, 2019, 2016, 2012, 2015, 2020, 2014, 2013]

In [334]:
months = airbnb_madrid['Last review'].dt.month.unique().tolist()
months = [5, 6]
months

[5, 6]

In [335]:
airbnb_madrid['Last review'].dt.month[airbnb_madrid['Last review'].dt.year == 2022].value_counts().sum()

2072

In [336]:
districts = airbnb_madrid['District'].unique().tolist()

In [337]:
total_airbnbs_by_month_2023 = {'Month': [], 'Airbnb reviews': [], 'District': []}

for d in districts:
    for m in months:
        total_airbnbs_by_month_2023['Month'].append(m)
        total_airbnbs_by_month_2023['Airbnb reviews'].append(airbnb_madrid[(airbnb_madrid['Last review'].dt.year == 2023) & (airbnb_madrid['Last review'].dt.month == m) & (airbnb_madrid['District'] == d)].value_counts().sum())
        total_airbnbs_by_month_2023['District'].append(d)

In [338]:
df2023 = pd.DataFrame.from_dict(total_airbnbs_by_month_2023)
df2023 = df2023.sort_values(by=['Month'])

In [339]:
list_months_2023 = ['May 2023', 'June 2023']
dict_months_2023 = {5: 'May 2023', 6: 'June 2023'}
dict_months_2023

{5: 'May 2023', 6: 'June 2023'}

In [344]:
df2023['Month'] = df2023['Month'].map(dict_months_2023)
df2023.to_csv('../raw/eda-result/reviews_may_june_2023_airbnb.csv', index=False, sep=',')
df2023

Unnamed: 0,Month,District
0,,Chamartín
38,,Moratalaz
36,,Villa de Vallecas
34,,Puente de Vallecas
32,,Usera
30,,Barajas
28,,Tetuán
26,,Hortaleza
24,,Villaverde
22,,Fuencarral - El Pardo


In [341]:
fig = px.histogram(df2023, x=df2023['District'][df2023['Month'] == 'May 2023'], y=df2023['Airbnb reviews'][df2023['Month'] == 'May 2023'], text_auto='.2s', color=df2023['District'][df2023['Month'] == 'May 2023'])
fig.update_traces(textfont_size=12, textangle=0, textposition='outside', cliponaxis=False)
fig.update_xaxes(categoryorder = 'total descending')
fig.show()

In [342]:
fig = px.histogram(df2023, x=df2023['District'][df2023['Month'] == 'June 2023'], y=df2023['Airbnb reviews'][df2023['Month'] == 'June 2023'], text_auto='.2s', color=df2023['District'][df2023['Month'] == 'May 2023'])
fig.update_traces(textfont_size=12, textangle=0, textposition='outside', cliponaxis=False)
fig.update_xaxes(categoryorder = 'total descending')
fig.show()