# Creating a dataframe containing Airbnb listings data from 2015 to 2017

In [1]:
# Dependencies
import pandas as pd
import os
import re # for filtering list of files

In [2]:
# List down csv files
files = os.listdir() # shows all the files inside the folder
r = re.compile(".*listings.csv") # filters the files to those with endings of the data files
newlist = list(filter(r.match, files)) # creates a list of csv files

In [3]:
# list of csv file contents
Airbnb_dflist = [pd.read_csv(file) for file in newlist]

  if self.run_code(code, result):


In [4]:
# parsing the names for the dataframes
file_names = [file_name[0:8] for file_name in newlist] 
file_names

['20161001',
 '20170101',
 '20171207',
 '20151102',
 '20150504',
 '20150902',
 '20151202',
 '20171101',
 '20160602',
 '20170502',
 '20171002',
 '20170202',
 '20161203',
 '20160902',
 '20160802',
 '20170302',
 '20170402',
 '20160702',
 '20171108',
 '20160502',
 '20170602',
 '20170902',
 '20160403',
 '20160202',
 '20170802',
 '20170702',
 '20171202',
 '20161102']

In [5]:
# put two lists into a dictionary (first list has names of files; second list has the contents of the csv files)
Airbnb_df_dict = dict(zip(file_names,Airbnb_dflist))

In [6]:
# extract a list of neighbourhoods for all periods in 2015
neighbourhoods = []
for key,value in Airbnb_df_dict.items():
    for k,v in Airbnb_df_dict[key].items():
        if k == "neighbourhood_cleansed":
            for i in v:
                neighbourhoods.append(i)

In [7]:
# extract a list of property types for all periods in 2015
property_type = []
for key, value in Airbnb_df_dict.items():
    for k,v in Airbnb_df_dict[key].items():
        if k == "property_type":
            for i in v:
                property_type.append(i)

In [8]:
# extract a list of daily rates for all periods in 2015
daily_rates = []
for key, value in Airbnb_df_dict.items():
    for k,v in Airbnb_df_dict[key].items():
        if k == "price":
            for i in v:
                daily_rates.append(i)

In [9]:
# extract a list of dates for all periods in 2015
dates = []
for key, value in Airbnb_df_dict.items():
    for k,v in Airbnb_df_dict[key].items():
        if k == "last_scraped":
            for i in v:
                dates.append(i)

# convert dates to year-month format
dates2 = []
for i in dates:
    dates2.append(i[0:7])

In [10]:
# create a new dataframe containing the lists of dates, neighbourhoods, property types, and daily rates
Airbnb_df = pd.DataFrame({"date": dates2,
                          "neighbourhood": neighbourhoods,
                          "property type": property_type,
                          "daily rate": daily_rates})

In [11]:
# Convert the price to annual rate from daily rate
daily_rate= Airbnb_df["daily rate"].str.replace("$","") # data is string
daily_rate = daily_rate.str.replace(",","") # data is still string
daily_rate = daily_rate.astype(float)
Airbnb_df["annual rate"] = daily_rate * 365

In [12]:
# view file 
Airbnb_df.head()

Unnamed: 0,date,neighbourhood,property type,daily rate,annual rate
0,2016-10,Seacliff,House,$105.00,38325.0
1,2016-10,Seacliff,House,$300.00,109500.0
2,2016-10,Seacliff,Apartment,$175.00,63875.0
3,2016-10,Seacliff,House,$90.00,32850.0
4,2016-10,Seacliff,Condominium,$400.00,146000.0


In [13]:
# save as csv
Airbnb_df.to_csv("Airbnb_listings.csv", sep = ",", encoding = "utf-8")