
![idealista logo](https://s14-eu5.ixquick.com/cgi-bin/serveimage?url=http%3A%2F%2Ft0.gstatic.com%2Fimages%3Fq%3Dtbn%3AANd9GcQqjeQLlcGYvzPz1B-a2NfDRLNNTjYlpUHJgC-kvYG-jUNehiOA&sp=c82e503b2dd362c5857b2b227c055500&anticache=491873)

## Goals of this notebook:
Idealista dataset will be the sample of the residential rental market in Madrid.
   - Retrive all properties for rent listed in idealista.com through the Idealista API.
   - Visualization of Idealista listings in Madrid map.
   - Find out average rents in Madrid broken down into neighborhood and apartment size.
   - Combine Airbnb and Idealista datasets to create a data frame to compare revenues generated in Airbnb and 
      Idealista. 
    

In [2]:
import numpy as np
import pandas as pd
import requests
import json
from pandas.io.json import json_normalize
import folium

## IDEALISTA API

#### To gain access to the Idealista API we must follow the next steps:

1) A formal request on their [site](http://developers.idealista.com/access-request)

2) Once your request is accepted (several weeks) an email is sent with an API key and a secret.

3) Idealista API is secured with OAuth, so to call the API we first need to have an access token.

 #### ACCESS TOKEN PROCEDURE: To get the access token we follow the instructions provided by Idealista:
    
  - API key and secret must be encoded in Basic 64. To do so, click  [here](https://www.base64encode.org)
  - Input-->APIkey:secret (remember to enter colon)
  - Authorization header: aGRzNGNlNnR6N3dnbmttN3ozcGd0c2l1MHVhMXBwcTI6V3ZCNzJJUEJEQWZu
 -  Request the token: We use the terminal rather than notebook to request our token:
       


       curl -X POST -H "Authorization: Basic aGRzNGNlNnR6N3dnbmttN3ozcGd0c2l1MHVhMXBwcTI6V3ZCNzJJUEJEQWZu" -H "Content-Type:application/x-www-form-urlencoded" -d 'grant_type=client_credentials&scope=read' 'https://api.idealista.com/oauth/token' -k
       

##### NOTE: 
- API access is free for 100 requests per month
- Token expires in 12 hours.            

#### API CALL
 - Once the token is available, we may call the API to request Idealista data.
 - Idealista API returns paginated results. As far as Madrid is concerned, there are 114 pages. 
 - Given that we only have 100 calls per month we call the API twice, to make sure it works properly. 
 - Please bear in mind that  Airbnb data was downloaded in april 2017 whereas Idealista dataset has been 
   retrieved in October 2017.


In [77]:
# API call
# Each call returns 1 page that contains only 50 listings. So we build a loop to request 100 pages.  
# Warning: to run this script, a fresh token is required.
ideal = []
token = 'eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6WyJyZWFkIl0sImV4cCI6MTUwNzA2NjQzMSwiYXV0aG9yaXRpZXMiOlsiUk9MRV9QVUJMSUMiXSwianRpIjoiMmNlZDBhM2EtNDM2MC00NGZkLWE1ZjQtMGM3MDIxNWM3N2Y4IiwiY2xpZW50X2lkIjoiaGRzNGNlNnR6N3dnbmttN3ozcGd0c2l1MHVhMXBwcTIifQ.1FNr6xQnvgEK3PKBksfte168E7XumRqTahawb4TM9ZQ'

for page in range(1,60):
    page = str(page)
    url = "http://api.idealista.com/3.5/es/search?country=es&maxItems=50&&sinceDate=M&hasMultimedia=True&propertyType=homes&operation=rent&locationId=0-EU-ES-28&numPage="+page
    headers = {'Authorization' : 'Bearer ' + token}
    content = requests.post(url,headers=headers)
    resp = json.loads(content.content)
    ideal +=[resp]
    
len(ideal)

55

In [4]:
ideab = []
token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzY29wZSI6WyJyZWFkIl0sImV4cCI6MTUwNzkzNTMzNSwiYXV0aG9yaXRpZXMiOlsiUk9MRV9QVUJMSUMiXSwianRpIjoiODY1MTBhMzktZTgzYS00MWUyLWJmN2ItMDQ2M2I3NWQwZDhkIiwiY2xpZW50X2lkIjoiaGRzNGNlNnR6N3dnbmttN3ozcGd0c2l1MHVhMXBwcTIifQ.DGmMlqUK1tF3YINJSW9k7iud9_mJhXFVTppOWzp7IcI"
for page in range(60,112):
    page = str(page)
    url = "http://api.idealista.com/3.5/es/search?country=es&maxItems=50&&sinceDate=M&hasMultimedia=True&propertyType=homes&operation=rent&locationId=0-EU-ES-28&numPage="+page
    headers = {'Authorization' : 'Bearer ' + token}
    content = requests.post(url,headers=headers)
    resp = json.loads(content.content)
    ideab +=[resp]
    
len(ideab)

52

In [5]:
ideab = json_normalize(ideab, 'elementList')
ideab.shape

(2476, 33)

In [152]:
# Save info in a csv,
ideab.to_csv('idealista2.csv',encoding='utf8')

In [30]:
# Join both csv and create a dataset with all listings of Idealista in Madrid.
# Datasets on github, TFM/datasets folder
idi1 = pd.read_csv('idealista1.csv')
idi2 = pd.read_csv('idealista2.csv')
df = pd.concat([idi1,idi2])
df.shape

(5226, 34)

#### VISUALIZATION OF RESIDENTIAL MARKET OFFER
#### Idealista listings location in Madrid map

In [31]:
# Flats for rent in Madrid (red spot)
m = folium.Map(location=[40.421522, -3.6938], zoom_start=13)
for index,row in df.iterrows():
    folium.Circle([row['latitude'], row['longitude']],
                        radius=1,
                        color='crimson',
                        fill=True,
                        fill_color='crimson').add_to(m)

m.save('ideal_rents.png')
m


### 1) IDEALISTA  DATASET PREPARATION

- Features selection.
- NaNs
- Areas, neighbourhood filtering
- Outliers, duplicates check
- Get the dataset ready to be merged with the  Airbnb data frame

In [39]:
df.head()

Unnamed: 0.1,Unnamed: 0,address,bathrooms,country,detailedType,district,exterior,externalReference,floor,has3DTour,...,propertyCode,propertyType,province,rooms,showAddress,size,status,suggestedTexts,thumbnail,url
0,0,barrio Ciudad Universitaria,1,es,{u'typology': u'flat'},Moncloa,True,01495,5,False,...,38312706,flat,Madrid,2,False,100.0,good,"{u'subtitle': u'Ciudad Universitaria, Madrid',...",https://img3.idealista.com/blur/WEB_LISTING/0/...,http://www.idealista.com/38312706
1,1,"calle Conde de Duque, 6",1,es,"{u'subTypology': u'studio', u'typology': u'flat'}",Centro,False,,3,False,...,38312999,studio,Madrid,0,True,25.0,good,"{u'subtitle': u'Malasaña-Universidad, Madrid',...",https://img3.idealista.com/blur/WEB_LISTING/0/...,http://www.idealista.com/38312999
2,2,barrio Bernabéu-Hispanoamérica,3,es,{u'typology': u'flat'},Chamartín,True,01494,3,False,...,38302487,flat,Madrid,4,False,180.0,good,"{u'subtitle': u'Bernabéu-Hispanoamérica, Madri...",https://img3.idealista.com/blur/WEB_LISTING/0/...,http://www.idealista.com/38302487
3,3,"calle de la infanta mercedes, 62",1,es,{u'typology': u'flat'},Tetuán,False,,3,False,...,33858989,flat,Madrid,1,True,40.0,good,"{u'subtitle': u'Cuzco-Castillejos, Madrid', u'...",https://img3.idealista.com/blur/WEB_LISTING/0/...,http://www.idealista.com/33858989
4,4,calle Castelló,2,es,{u'typology': u'flat'},Salamanca,True,ACC20817,5,False,...,38312846,flat,Madrid,2,False,120.0,good,"{u'subtitle': u'Castellana, Madrid', u'title':...",https://img3.idealista.com/blur/WEB_LISTING/0/...,http://www.idealista.com/38312846


In [7]:
df.columns.tolist()

['Unnamed: 0',
 'address',
 'bathrooms',
 'country',
 'detailedType',
 'district',
 'exterior',
 'externalReference',
 'floor',
 'has3DTour',
 'hasLift',
 'hasPlan',
 'hasVideo',
 'latitude',
 'longitude',
 'municipality',
 'neighborhood',
 'newDevelopment',
 'newDevelopmentFinished',
 'numPhotos',
 'operation',
 'parkingSpace',
 'price',
 'priceByArea',
 'propertyCode',
 'propertyType',
 'province',
 'rooms',
 'showAddress',
 'size',
 'status',
 'suggestedTexts',
 'thumbnail',
 'url']

In [11]:
# Explore Idealista dataset. Variables of interest
df[['address','district','neighborhood','size','rooms','price','priceByArea','floor']]


Unnamed: 0,address,district,neighborhood,size,rooms,price,priceByArea,floor
0,barrio Ciudad Universitaria,Moncloa,Ciudad Universitaria,100.0,2,1600.0,16.0,5
1,"calle Conde de Duque, 6",Centro,Malasaña-Universidad,25.0,0,800.0,32.0,3
2,barrio Bernabéu-Hispanoamérica,Chamartín,Bernabéu-Hispanoamérica,180.0,4,2400.0,13.0,3
3,"calle de la infanta mercedes, 62",Tetuán,Cuzco-Castillejos,40.0,1,950.0,24.0,3
4,calle Castelló,Salamanca,Castellana,120.0,2,2250.0,19.0,5
5,barrio Chueca-Justicia,Centro,Chueca-Justicia,93.0,1,1500.0,16.0,1
6,paseo de la Castellana,Tetuán,Cuatro Caminos,90.0,2,1500.0,17.0,7
7,"calle Bernardino Obregón, 19",Arganzuela,Palos de Moguer,50.0,1,875.0,18.0,2
8,Centro,Centro,,70.0,2,650.0,9.0,1
9,barrio El Viso,Chamartín,El Viso,95.0,2,2500.0,26.0,2


In [12]:
# Duplicated listings check
df.duplicated().sum()

0

### DISTRICT SELECTION
- NaNs 
- Idealista API information includes properties located outside the metropolitan area of Madrid. We remove them.
- In order to compare properties we need to have the same districts with the same name in the idealista and Airbnb datasets.

In [35]:
# We get rid of rows with no neighbourhood and no address. As seen above, these are listings of nearby towns.
df = df.drop(df[df.district.isnull()].index)
df.shape

(4702, 34)

In [14]:
df.district.value_counts()

Salamanca                                455
Centro                                   358
Chamartín                                301
Chamberí                                 269
Tetuán                                   256
Fuencarral                               198
Moncloa                                  191
Hortaleza                                181
Retiro                                   127
Ciudad Lineal                            112
Arganzuela                               102
Carabanchel                               96
San Blas                                  89
Puente de Vallecas                        78
Latina                                    52
Usera                                     49
Barajas                                   44
Villa de Vallecas                         43
Zona Prado de Somosaguas - La Finca       40
Villaverde                                36
Moratalaz                                 25
Rozas Centro                              23
Vicálvaro 

In [36]:
# Selection of properties located in Madrid districts
areas = ['Centro',u'Arganzuela',u'Retiro',u'Salamanca','Chamartín','Moratalaz','Tetuán','Chamberí','Fuerncarral','El Pardo','Moncloa','Aravaca',
        'Carabanchel','Latina','Puente de Vallecas','Usera','Ciudad Lineal','Hortaleza','Villaverde','Villa de Vallecas','Vicálvaro','San Blas','Canillejas','Barajas']
df = df[df.district.isin(areas)]
df.shape

(3461, 34)

### OUTLIERS

In [37]:
# Prices over 10000
# Looks fine
df.groupby(['neighborhood','rooms'],as_index=False).price.max().sort_values(by='price',ascending=False)

Unnamed: 0,neighborhood,rooms,price
225,Goya,7,11000.0
145,Ciudad Universitaria,8,10500.0
194,El Viso,5,10000.0
161,Conde Orgaz-Piovera,6,10000.0
115,Castellana,6,9000.0
112,Castellana,3,9000.0
188,El Plantío,6,8000.0
249,Jerónimos,4,8000.0
113,Castellana,4,8000.0
458,Valdemarín,7,8000.0


### Summary dataset of Idealista listings

- Airbnb does not provide size (meters) of the properties. So we group by number of rooms as a measure of size instead of sq meters

In [38]:
# Group by neighbourhood, rooms and price.
# Again, we use the median instead of the mean to have an accurate measure of average rents
ide = df.groupby(['neighborhood','rooms'],as_index=False).price.agg(['median','count']).reset_index()
ide = pd.DataFrame(ide)
ide

Unnamed: 0,neighborhood,rooms,median,count
0,12 de Octubre-Orcasur,0,600.0,1
1,12 de Octubre-Orcasur,1,575.0,1
2,12 de Octubre-Orcasur,2,810.0,1
3,12 de Octubre-Orcasur,3,950.0,2
4,Abrantes,2,650.0,4
5,Abrantes,3,750.0,3
6,Abrantes,4,1000.0,4
7,Acacias,0,475.0,1
8,Acacias,1,825.0,6
9,Acacias,2,1012.5,4


#### Neighbourhood name replacement.
Several 'barrios' in idealista have different names than in Airbnb.Those area names are amended in order
to have neighborhoods in both datasets with equal names. 


In [18]:
# Name replacement

ide['neighborhood']=ide.neighborhood.str.replace('Conde Orgaz-Piovera','Piovera')
ide['neighborhood']=ide.neighborhood.str.replace('Bernabéu-Hispanoamérica','Hispanoamérica')
ide['neighborhood']=ide.neighborhood.str.replace('Cuzco-Castillejos','Castillejos')
ide['neighborhood']=ide.neighborhood.str.replace('Chueca-Justicia','Justicia')
ide['neighborhood']=ide.neighborhood.str.replace('Lavapiés-Embajadores','Embajadores')
ide['neighborhood']=ide.neighborhood.str.replace('Huertas-Cortes','Cortes')
ide['neighborhood']=ide.neighborhood.str.replace('Valdebebas - Valdefuentes','Valdefuentes')
ide['neighborhood']=ide.neighborhood.str.replace('Nuevos Ministerios-Ríos Rosas','Rios Rosas')
ide['neighborhood']=ide.neighborhood.str.replace('Malasaña-Universidad','Universidad')
ide['neighborhood']=ide.neighborhood.str.replace('Ventilla-Almenara','Almenara')
ide['neighborhood']=ide.neighborhood.str.replace('Sanchinarro','Valdefuentes')



![](http://rentmoreweeks.com/wp-content/uploads/2015/04/airbnb-logo.png)

### 2) AIRBNB Dataset preparation

- Variable selection
- Get the dataset ready to be merged with the Idealista data frame

In [4]:
# First we import airbnb dataset (already cleaned) and create a dataframe with the same variables than idealista`s table.
bnb = pd.read_csv('airbnb.csv') # file saved in Data_filtering_final notebook
bnb.shape

(10077, 98)

In [22]:
bnb.room_type.value_counts()

Entire home/apt    6269
Private room       3643
Shared room         161
Name: room_type, dtype: int64

In [5]:
# Shared rooms and Private rooms are out of interest. Focus on flats
# Private room and Shared room category removed
bnb = bnb.drop(bnb[(bnb.room_type=='Private room') | (bnb.room_type=='Shared room')].index)
bnb.shape

(6271, 98)

In [6]:
# Selection of variables of interest
bnb1 = bnb[['neighbourhood_group_cleansed','neighbourhood_cleansed','bedrooms','accommodates','price','availability_30']]

In [7]:
# Avg price (meadian), number of flats and occupancy in each neighbourhood
bnb2 = bnb1.groupby(['neighbourhood_cleansed','bedrooms'],as_index=False).price.agg(['median','count']).reset_index()
bnb3 = bnb1.groupby(['neighbourhood_cleansed','bedrooms'],as_index=False).availability_30.mean()

In [8]:
# Merge bnb2 and bnb3
bnbs = [bnb2,bnb3]
bnb4 = reduce(lambda left,right: pd.merge(left,right,on=['neighbourhood_cleansed','bedrooms']),bnbs)
bnb4

Unnamed: 0,neighbourhood_cleansed,bedrooms,median,count,availability_30
0,Abrantes,1.0,53.25500,1,9.000000
1,Abrantes,2.0,56.45030,1,13.000000
2,Acacias,0.0,36.74595,4,14.500000
3,Acacias,1.0,63.90600,21,7.571429
4,Acacias,2.0,83.61035,18,8.666667
5,Acacias,3.0,63.90600,1,13.000000
6,Acacias,4.0,117.16100,1,2.000000
7,Adelfas,0.0,53.25500,4,5.000000
8,Adelfas,1.0,69.23150,10,11.800000
9,Adelfas,2.0,83.07780,9,9.111111


In [9]:
# Addition of new variable: gross estimated income 
# Summary df with estimated income for each neighbourhood and apartment size
bnb4.rename(columns={'median':'Avg_Price_Night_Airbnb','neighbourhood_cleansed':'neighborhood','bedrooms':'rooms'},inplace=True)
bnb4['Estimated_income_Airbnb_EUR'] = bnb4.Avg_Price_Night_Airbnb*(30 - bnb4.availability_30)
bnb4

Unnamed: 0,neighborhood,rooms,Avg_Price_Night_Airbnb,count,availability_30,Estimated_income_Airbnb_EUR
0,Abrantes,1.0,53.25500,1,9.000000,1118.355000
1,Abrantes,2.0,56.45030,1,13.000000,959.655100
2,Acacias,0.0,36.74595,4,14.500000,569.562225
3,Acacias,1.0,63.90600,21,7.571429,1433.320286
4,Acacias,2.0,83.61035,18,8.666667,1783.687467
5,Acacias,3.0,63.90600,1,13.000000,1086.402000
6,Acacias,4.0,117.16100,1,2.000000,3280.508000
7,Adelfas,0.0,53.25500,4,5.000000,1331.375000
8,Adelfas,1.0,69.23150,10,11.800000,1260.013300
9,Adelfas,2.0,83.07780,9,9.111111,1735.402933


![](https://s14-eu5.ixquick.com/cgi-bin/serveimage?url=http%3A%2F%2Ft0.gstatic.com%2Fimages%3Fq%3Dtbn%3AANd9GcQqjeQLlcGYvzPz1B-a2NfDRLNNTjYlpUHJgC-kvYG-jUNehiOA&sp=c82e503b2dd362c5857b2b227c055500&anticache=491873)![](http://rentmoreweeks.com/wp-content/uploads/2015/04/airbnb-logo.png)

### 3) Merge both datasets to get final summary dataframe
In order to compare residential market to vacational market rents, we create a single data frame with the data of Idealista and Airbnb

In [19]:
fin = pd.merge(ide,bnb4,how='outer',on=['neighborhood','rooms'])

In [20]:
# Rename columns
fin.rename(columns={'median':'Monthly_rent_Idealista_EUR','count_x':'Number_flats_IDEALISTA','count_y':'Number_flats_AIRBNB'},inplace=True)

In [21]:
# Neighbourhoods with no Airbnb listings are deleted
afueras = ['Zona Renfe','Virgen del Cortijo - Manoteras','12 de Octubre-Orcasur','Ambroz','Atalaya','Campo de las Naciones-Corralejos',
          'Casco Antiguo','El Cañaveral - Los Berrocales','Descubridores-Escritores','Ensanche de Vallecas - La Gavia','Los Ángeles',
          'Orcasitas','Pau de Carabanchel','Valdebernardo - Valderribas','Valdemarín']
fin = fin.drop(fin[fin.neighborhood.isin(afueras)].index)

In [22]:
# NaNs 
# Missing values means no offer available so NaNs replaced by 0.
fin = fin.replace(np.nan,0)

In [23]:
# NEW FEATURE:
# Difference of income: premium over residential rents for flats rented on Airbnb
fin['Holyday_rental_premium_pct'] = ((fin.Estimated_income_Airbnb_EUR - fin.Monthly_rent_Idealista_EUR)/fin.Monthly_rent_Idealista_EUR)*100

fin.head()

Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
4,Abrantes,2,650.0,4.0,56.4503,1.0,13.0,959.6551,47.639246
5,Abrantes,3,750.0,3.0,0.0,0.0,0.0,0.0,-100.0
6,Abrantes,4,1000.0,4.0,0.0,0.0,0.0,0.0,-100.0
7,Acacias,0,475.0,1.0,36.74595,4.0,14.5,569.562225,19.907837
8,Acacias,1,825.0,6.0,63.906,21.0,7.571429,1433.320286,73.735792


In [24]:
# In areas or flat sizes with no Airbnb offer, premium column shows infinity so we replace it by 0. 
fin['Holyday_rental_premium_pct'] = fin.Holyday_rental_premium_pct.replace(np.inf,0)
fin.head(15)

Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
4,Abrantes,2,650.0,4.0,56.4503,1.0,13.0,959.6551,47.639246
5,Abrantes,3,750.0,3.0,0.0,0.0,0.0,0.0,-100.0
6,Abrantes,4,1000.0,4.0,0.0,0.0,0.0,0.0,-100.0
7,Acacias,0,475.0,1.0,36.74595,4.0,14.5,569.562225,19.907837
8,Acacias,1,825.0,6.0,63.906,21.0,7.571429,1433.320286,73.735792
9,Acacias,2,1012.5,4.0,83.61035,18.0,8.666667,1783.687467,76.166663
10,Acacias,3,1300.0,10.0,63.906,1.0,13.0,1086.402,-16.430615
11,Acacias,4,1350.0,1.0,117.161,1.0,2.0,3280.508,143.000593
12,Adelfas,1,975.0,5.0,69.2315,10.0,11.8,1260.0133,29.232133
13,Adelfas,2,1200.0,11.0,83.0778,9.0,9.111111,1735.402933,44.616911


### SELECTION OF POPULAR NEIGHBOURHOODS. FINAL DATASET
There are many areas where Airbnb listings are almost non existant. 
Figures related to areas with less than 5 listings are misleading and not reliable. We cannot get a grasp of a 
rental market just based on a few homes listings so we get rid of these neighbourhoods.



In [25]:
center = fin.drop(fin[(fin.Number_flats_AIRBNB<5)].index)
center.shape

(161, 9)

To sum up, we have the dataset with the estimated gross monthly incomes of properties listed in Airbnb and Idealista based on area and size(number of rooms). Moreover, we have the amount of listings in both platforms in each neighbourhood as well as the premium (percentage) landlors receive while renting on Airbnb. 

In [94]:
center.to_csv('center.csv')# File for Dashboard

In [26]:
center.head(25)

Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
8,Acacias,1,825.0,6.0,63.906,21.0,7.571429,1433.320286,73.735792
9,Acacias,2,1012.5,4.0,83.61035,18.0,8.666667,1783.687467,76.166663
12,Adelfas,1,975.0,5.0,69.2315,10.0,11.8,1260.0133,29.232133
13,Adelfas,2,1200.0,11.0,83.0778,9.0,9.111111,1735.402933,44.616911
20,Almagro,0,900.0,2.0,69.2315,9.0,10.0,1384.63,53.847778
21,Almagro,1,1200.0,10.0,80.9476,34.0,8.911765,1707.042035,42.253503
22,Almagro,2,2350.0,27.0,106.51,21.0,9.809524,2150.487619,-8.489889
23,Almagro,3,2475.0,16.0,176.8066,5.0,12.0,3182.5188,28.586618
26,Almendrales,0,660.0,7.0,34.0832,5.0,7.0,783.9136,18.774788
36,Arapiles,0,650.0,3.0,53.255,22.0,8.227273,1159.506591,78.385629


## RANKINGS


In [27]:
# Neighbourhood and type of flat with largest estimated gross income
center.sort_values(by='Estimated_income_Airbnb_EUR',ascending=False)

Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
380,Recoletos,4,4000.0,28.0,498.46680,6.0,16.666667,6646.224000,66.155600
379,Recoletos,3,2900.0,26.0,275.86090,13.0,10.307692,5432.337723,87.321990
447,Trafalgar,4,1900.0,2.0,242.31025,6.0,9.166667,5048.130208,165.691064
538,Embajadores,5,0.0,0.0,213.02000,7.0,7.285714,4838.597143,0.000000
254,Embajadores,4,2500.0,1.0,197.04350,8.0,7.000000,4532.000500,81.280020
602,Universidad,4,0.0,0.0,191.71800,10.0,6.400000,4524.544800,0.000000
438,Sol,3,1700.0,6.0,181.06700,50.0,7.740000,4030.551420,137.091260
597,Sol,4,0.0,0.0,223.67100,15.0,12.333333,3951.521000,0.000000
237,Cortes,4,2200.0,1.0,170.41600,11.0,7.545455,3826.613818,73.936992
319,Palacio,3,2400.0,8.0,170.41600,52.0,8.538462,3657.389538,52.391231


In [28]:
# Neighbourhood and types of flat with largest discrepancy between Airbnb and Idealista
prem = center.sort_values(by='Holyday_rental_premium_pct',ascending=False)
prem


Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
393,San Andrés,1,450.0,3.0,46.86440,6.0,4.333333,1202.852933,167.300652
447,Trafalgar,4,1900.0,2.0,242.31025,6.0,9.166667,5048.130208,165.691064
438,Sol,3,1700.0,6.0,181.06700,50.0,7.740000,4030.551420,137.091260
39,Arapiles,3,1500.0,8.0,154.43950,5.0,7.400000,3490.332700,132.688847
400,San Diego,2,657.5,18.0,63.90600,7.0,6.428571,1506.355714,129.103531
373,Quintana,2,750.0,3.0,73.49190,5.0,6.800000,1705.012080,127.334944
106,Casco Histórico de Vallecas,2,600.0,1.0,106.51000,5.0,17.200000,1363.328000,127.221333
363,Puerta Bonita,1,550.0,5.0,61.77580,5.0,11.800000,1124.319560,104.421738
134,Ciudad Jardín,1,720.0,7.0,74.55700,6.0,10.666667,1441.435333,100.199352
301,Numancia,2,800.0,3.0,65.50365,6.0,6.166667,1561.170325,95.146291


In [29]:
# Neighbourhoods with highest occupancy, most demanded
center.sort_values(by='availability_30',ascending=True)


Unnamed: 0,neighborhood,rooms,Monthly_rent_Idealista_EUR,Number_flats_IDEALISTA,Avg_Price_Night_Airbnb,Number_flats_AIRBNB,availability_30,Estimated_income_Airbnb_EUR,Holyday_rental_premium_pct
506,Atocha,1,0.0,0.0,57.51540,6.0,3.000000,1552.915800,0.000000
393,San Andrés,1,450.0,3.0,46.86440,6.0,4.333333,1202.852933,167.300652
592,Rios Rosas,0,0.0,0.0,63.90600,6.0,4.500000,1629.603000,0.000000
153,Concepción,2,820.0,2.0,63.90600,7.0,5.857143,1542.873429,88.155296
182,Delicias,3,1450.0,15.0,96.92410,5.0,6.000000,2326.178400,60.426097
301,Numancia,2,800.0,3.0,65.50365,6.0,6.166667,1561.170325,95.146291
333,Palos de Moguer,0,750.0,1.0,52.18990,9.0,6.333333,1235.160967,64.688129
602,Universidad,4,0.0,0.0,191.71800,10.0,6.400000,4524.544800,0.000000
400,San Diego,2,657.5,18.0,63.90600,7.0,6.428571,1506.355714,129.103531
250,Embajadores,0,900.0,15.0,53.25500,137.0,6.525547,1250.131971,38.903552
