In [1]:
import matplotlib
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pandas import *
%matplotlib inline 

import warnings
warnings.filterwarnings('ignore')

cmap = sns.diverging_palette(220, 10, as_cmap=True)

In [4]:
def read_data(location):
    location = location[['id', 'room_type', 'price', 'minimum_nights', 'availability_365', 'number_of_reviews', 
                         'reviews_per_month', 'calculated_host_listings_count']]
    return location

def get_stats(location):
    x = ['price', 'minimum_nights', 'availability_365', 'number_of_reviews', 'reviews_per_month', 'calculated_host_listings_count']
    location = location.loc[:, x]
    location_stats = location.describe()
    location_stats = concat([location_stats.ix[0:4], location_stats.ix[7:]])
    return location_stats

def reorder(location):
    new = location.set_index('location', append = True).unstack(0)
    return new

# Airbnb Data Pre-Processing
## 1 - Reading & Cleaning
A look into the Airbnb listing features provided for each city by the data:

In [5]:
amsterdam = read_csv('Amsterdam-Data/listings.csv')
amsterdam = read_data(amsterdam)
amsterdam['price'] = amsterdam['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

athens = read_csv('Athens-Data/listings.csv')
athens = read_data(athens)
athens['price'] = athens['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

barcelona = read_csv('Barcelona-Data/listings.csv')
barcelona = read_data(barcelona)
barcelona['price'] = barcelona['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

berlin = read_csv('Berlin-Data/listings.csv')
berlin = read_data(berlin)
berlin['price'] = berlin['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

brussels = read_csv('Brussels-Data/listings.csv')
brussels = read_data(brussels)
brussels['price'] = brussels['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

copenhagen = read_csv('Copenhagen-Data/listings.csv')
copenhagen = read_data(copenhagen)
copenhagen['price'] = copenhagen['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

dublin = read_csv('Dublin-Data/listings.csv')
dublin = read_data(dublin)
dublin['price'] = dublin['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

edinburgh = read_csv('Edinburgh-Data/listings.csv')
edinburgh = read_data(edinburgh)
edinburgh['price'] = edinburgh['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

florence = read_csv('Florence-Data/listings.csv')
florence = read_data(florence)
florence['price'] = florence['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

geneva = read_csv('Geneva-Data/listings.csv')
geneva = read_data(geneva)
geneva['price'] = geneva['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

istanbul = read_csv('Istanbul-Data/listings.csv')
istanbul = read_data(istanbul)
istanbul['price'] = istanbul['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

lisbon = read_csv('Lisbon-Data/listings.csv')
lisbon = read_data(lisbon)
lisbon['price'] = lisbon['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

london = read_csv('London-Data/listings.csv')
london = read_data(london)
london['price'] = london['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

lyon = read_csv('Lyon-Data/listings.csv')
lyon = read_data(lyon)
lyon['price'] = lyon['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

madrid = read_csv('Madrid-Data/listings.csv')
madrid = read_data(madrid)
madrid['price'] = madrid['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

manchester = read_csv('Manchester-Data/listings.csv')
manchester = read_data(manchester)
manchester['price'] = manchester['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

milan = read_csv('Milan-Data/listings.csv')
milan = read_data(milan)
milan['price'] = milan['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

oslo = read_csv('Oslo-Data/listings.csv')
oslo = read_data(oslo)
oslo['price'] = oslo['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

paris = read_csv('Paris-Data/listings.csv')
paris = read_data(paris)
paris['price'] = paris['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

prague = read_csv('Prague-Data/listings.csv')
prague = read_data(prague)
prague['price'] = prague['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

rome = read_csv('Rome-Data/listings.csv')
rome = read_data(rome)
rome['price'] = rome['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

stockholm = read_csv('Stockholm-Data/listings.csv')
stockholm = read_data(stockholm)
stockholm['price'] = stockholm['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

venice = read_csv('Venice-Data/listings.csv')
venice = read_data(venice)
venice['price'] = venice['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

vienna = read_csv('Vienna-Data/listings.csv')
vienna = read_data(vienna)
vienna['price'] = vienna['price'].map(lambda x: str(x)[1:]).convert_objects(convert_numeric=True)

## 2 - Getting Statistics on Airbnb Data:
Creating a dataframe to house statistical summaries of the listing data so that we can analyze listing trends.

In [6]:
amsterdam_s = get_stats(amsterdam)
amsterdam_s['location'] = 'Amsterdam, NED'
amsterdam_s = reorder(amsterdam_s)

athens_s = get_stats(athens)
athens_s['location'] = 'Athens, GRE'
athens_s = reorder(athens_s)

barcelona_s = get_stats(barcelona)
barcelona_s['location'] = 'Barcelona, ESP'
barcelona_s = reorder(barcelona_s)

berlin_s = get_stats(berlin)
berlin_s['location'] = 'Berlin, GER'
berlin_s = reorder(berlin_s)

brussels_s = get_stats(brussels)
brussels_s['location'] = 'Brussels, BEL'
brussels_s = reorder(brussels_s)

copenhagen_s = get_stats(copenhagen)
copenhagen_s['location'] = 'Copenhagen, DEN'
copenhagen_s = reorder(copenhagen_s)

dublin_s = get_stats(dublin)
dublin_s['location'] = 'Dublin, IRE'
dublin_s = reorder(dublin_s)

edinburgh_s = get_stats(edinburgh)
edinburgh_s['location'] = 'Edinburgh, SCO'
edinburgh_s = reorder(edinburgh_s)

florence_s = get_stats(florence)
florence_s['location'] = 'Florence, ITA'
florence_s = reorder(florence_s)

geneva_s = get_stats(geneva)
geneva_s['location'] = 'Geneva, SWI'
geneva_s = reorder(geneva_s)

istanbul_s = get_stats(istanbul)
istanbul_s['location'] = 'Istanbul, TUR'
istanbul_s = reorder(istanbul_s)

lisbon_s = get_stats(lisbon)
lisbon_s['location'] = 'Lisbon, POR'
lisbon_s = reorder(lisbon_s)

london_s = get_stats(london)
london_s['location'] = 'London, ENG'
london_s = reorder(london_s)

lyon_s = get_stats(lyon)
lyon_s['location'] = 'Lyon, FRA'
lyon_s = reorder(lyon_s)

madrid_s = get_stats(madrid)
madrid_s['location'] = 'Madrid, ESP'
madrid_s = reorder(madrid_s)

manchester_s = get_stats(manchester)
manchester_s['location'] = 'Manchester, ENG'
manchester_s = reorder(manchester_s)

milan_s = get_stats(milan)
milan_s['location'] = 'Milan, ITA'
milan_s = reorder(milan_s)

oslo_s = get_stats(oslo)
oslo_s['location'] = 'Oslo, NOR'
oslo_s = reorder(oslo_s)

paris_s = get_stats(paris)
paris_s['location'] = 'Paris, FRA'
paris_s = reorder(paris_s)

prague_s = get_stats(prague)
prague_s['location'] = 'Prague, CZE'
prague_s = reorder(prague_s)

rome_s = get_stats(rome)
rome_s['location'] = 'Rome, ITA'
rome_s = reorder(rome_s)

stockholm_s = get_stats(stockholm)
stockholm_s['location'] = 'Stockholm, SWE'
stockholm_s = reorder(stockholm_s)

venice_s = get_stats(venice)
venice_s['location'] = 'Venice, ITA'
venice_s = reorder(venice_s)

vienna_s = get_stats(vienna)
vienna_s['location'] = 'Vienna, AUS'
vienna_s = reorder(vienna_s)

In [8]:
statistics = concat([amsterdam_s, athens_s, barcelona_s, berlin_s, brussels_s, copenhagen_s, dublin_s, edinburgh_s, 
                     florence_s, geneva_s, istanbul_s, lisbon_s, london_s, lyon_s, madrid_s, manchester_s,
                    milan_s, oslo_s, paris_s, prague_s, rome_s, stockholm_s, venice_s, vienna_s])
statistics

Unnamed: 0_level_0,price,price,price,price,price,minimum_nights,minimum_nights,minimum_nights,minimum_nights,minimum_nights,...,reviews_per_month,reviews_per_month,reviews_per_month,reviews_per_month,reviews_per_month,calculated_host_listings_count,calculated_host_listings_count,calculated_host_listings_count,calculated_host_listings_count,calculated_host_listings_count
Unnamed: 0_level_1,count,max,mean,min,std,count,max,mean,min,std,...,count,max,mean,min,std,count,max,mean,min,std
location,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2
"Amsterdam, NED",19534.0,600.0,27.517303,0.0,28.755842,19538.0,1001.0,3.077388,1.0,10.986325,...,17108.0,72.46,1.111769,0.01,1.600833,19538.0,108.0,3.577336,1.0,13.082243
"Athens, GRE",7801.0,999.0,7.034867,0.0,19.873609,7828.0,700.0,2.750128,1.0,17.38861,...,5947.0,138.18,2.249939,0.01,3.02879,7828.0,77.0,6.458866,1.0,13.683493
"Barcelona, ESP",17754.0,980.0,12.488228,0.0,23.86203,17788.0,365.0,3.277209,1.0,7.961353,...,14748.0,65.84,1.798457,0.01,1.692814,17788.0,148.0,9.685518,1.0,22.32426
"Berlin, GER",26294.0,900.0,6.588385,0.0,14.924848,26323.0,5000.0,4.817156,1.0,34.39141,...,21490.0,31.34,1.061641,0.01,1.375658,26323.0,43.0,1.781142,1.0,3.409751
"Brussels, BEL",7042.0,944.0,7.242545,0.0,18.598432,7049.0,1000.0,4.996595,1.0,21.812299,...,5787.0,389.0,1.805073,0.01,6.07285,7049.0,68.0,4.817563,1.0,11.588365
"Copenhagen, DEN",25460.0,7877.0,98.998154,0.0,168.443335,25465.0,1000.0,3.266366,1.0,12.105597,...,20827.0,23.16,0.752716,0.01,0.967519,25465.0,45.0,1.300255,1.0,2.052257
"Dublin, IRE",9023.0,690.0,15.863238,0.0,24.875414,9028.0,1000.0,2.834515,1.0,13.582642,...,7532.0,73.27,2.059344,0.01,2.200524,9028.0,40.0,2.906513,1.0,4.622148
"Edinburgh, SCO",9628.0,999.0,12.651849,0.0,31.325715,9638.0,1000.0,3.064121,1.0,20.712068,...,8560.0,18.0,2.498798,0.01,2.352915,9638.0,80.0,3.142146,1.0,8.147756
"Florence, ITA",10833.0,800.0,12.70516,0.0,24.937478,10842.0,365.0,2.363771,1.0,6.148557,...,9123.0,28.2,2.088293,0.01,2.011556,10842.0,167.0,13.541229,1.0,30.970911
"Geneva, SWI",3103.0,900.0,18.604576,0.0,38.834444,3107.0,10000.0,7.028645,1.0,179.622182,...,2452.0,21.11,1.297863,0.02,1.730443,3107.0,76.0,4.448986,1.0,12.193475


Columns included in statistical summary:

In [9]:
statistics.columns.values

array([('price', 'count'), ('price', 'max'), ('price', 'mean'),
       ('price', 'min'), ('price', 'std'), ('minimum_nights', 'count'),
       ('minimum_nights', 'max'), ('minimum_nights', 'mean'),
       ('minimum_nights', 'min'), ('minimum_nights', 'std'),
       ('availability_365', 'count'), ('availability_365', 'max'),
       ('availability_365', 'mean'), ('availability_365', 'min'),
       ('availability_365', 'std'), ('number_of_reviews', 'count'),
       ('number_of_reviews', 'max'), ('number_of_reviews', 'mean'),
       ('number_of_reviews', 'min'), ('number_of_reviews', 'std'),
       ('reviews_per_month', 'count'), ('reviews_per_month', 'max'),
       ('reviews_per_month', 'mean'), ('reviews_per_month', 'min'),
       ('reviews_per_month', 'std'),
       ('calculated_host_listings_count', 'count'),
       ('calculated_host_listings_count', 'max'),
       ('calculated_host_listings_count', 'mean'),
       ('calculated_host_listings_count', 'min'),
       ('calculated_host_lis