# Airbnb Rate Analysis 

- Find trends in Airbnb rates in different neighbourhoods in San Francisco

### Importing Dependencies

In [1]:
# Dependencies
import pandas as pd
import numpy as np

### Creating Dataframe from the csv file

In [2]:
#Create a dataframe from the csv file
airbnb_df = pd.read_csv("Data/Airbnb_listings.csv")
airbnb_df.head()

Unnamed: 0.1,Unnamed: 0,date,neighbourhood,property type,daily rate,annual rate (USD)
0,0,2016-10,Seacliff,House,$105.00,38325.0
1,1,2016-10,Seacliff,House,$300.00,109500.0
2,2,2016-10,Seacliff,Apartment,$175.00,63875.0
3,3,2016-10,Seacliff,House,$90.00,32850.0
4,4,2016-10,Seacliff,Condominium,$400.00,146000.0


### Cleaning Data and Creating new DataFrame

In [3]:
# Remove the Unnamed:0 column
data = airbnb_df.drop(['Unnamed: 0'], axis=1)
data.head()

Unnamed: 0,date,neighbourhood,property type,daily rate,annual rate (USD)
0,2016-10,Seacliff,House,$105.00,38325.0
1,2016-10,Seacliff,House,$300.00,109500.0
2,2016-10,Seacliff,Apartment,$175.00,63875.0
3,2016-10,Seacliff,House,$90.00,32850.0
4,2016-10,Seacliff,Condominium,$400.00,146000.0


In [4]:
# Remove the month from the date by using the list comprehension
#for each in data['date']:
  #  return each[:4]
data['date'] = [e[:4] for e in data['date']]

In [5]:
# Set the date to year 2015 data 
data_year_2015 = data[data["date"] == '2015']

# Calculate the Annual Average Rate of year 2015 
ave_2015 = data_year_2015.groupby("neighbourhood").mean()
ave_2015 = ave_2015.reset_index()

# Rename the column for average annual rate
ave_2015 = ave_2015.rename(columns = {"annual rate (USD)": "Average annual rate"})

# Round the average rate to two decimal places
ave_2015 = ave_2015.round(2)

# Count the number of listings per neighbourhood
no_listings_2015 = list(data_year_2015.groupby("neighbourhood")["daily rate"].nunique())

# Add number of listings as a column in dataframe
ave_2015["No. of listings"] = no_listings_2015

ave_2015.head()

Unnamed: 0,neighbourhood,Average annual rate,No. of listings
0,Bayview,46374.21,84
1,Bernal Heights,68704.42,156
2,Castro/Upper Market,84268.13,191
3,Chinatown,91821.37,78
4,Crocker Amazon,47207.84,46


In [6]:
# Set the date to year 2016 data 
data_year_2016 = data[data["date"] == '2016']

# Calculate the Annual Average Rate of year 2016 
ave_2016 = data_year_2016.groupby("neighbourhood").mean()
ave_2016 = ave_2016.reset_index()

# Rename the column for average annual rate
ave_2016 = ave_2016.rename(columns = {"annual rate (USD)": "Average annual rate"})

# Round the average rate to two decimal places
ave_2016 = ave_2016.round(2)

# Count the number of listings per neighbourhood
no_listings_2016 = list(data_year_2016.groupby("neighbourhood")["daily rate"].nunique())

# Add number of listings as a column in dataframe
ave_2016["No. of listings"] = no_listings_2016

ave_2016.head()

Unnamed: 0,neighbourhood,Average annual rate,No. of listings
0,Bayview,58118.04,126
1,Bernal Heights,77447.15,191
2,Castro/Upper Market,97370.54,246
3,Chinatown,108057.51,123
4,Crocker Amazon,43735.51,51


In [7]:
# Set the date to year 2017 data 
data_year_2017 = data[data["date"] == '2017']

# Calculate the Annual Average Rate of year 2017 
ave_2017 = data_year_2017.groupby("neighbourhood").mean()
ave_2017 = ave_2017.reset_index()

# Rename the column for average annual rate
ave_2017 = ave_2017.rename(columns = {"annual rate (USD)": "Average annual rate"})

# Round the average rate to two decimal places
ave_2017 = ave_2017.round(2)

# Count the number of listings per neighbourhood
no_listings_2017 = list(data_year_2017.groupby("neighbourhood")["daily rate"].nunique())

# Add number of listings as a column in dataframe
ave_2017["No. of listings"] = no_listings_2017


ave_2017.head()

Unnamed: 0,neighbourhood,Average annual rate,No. of listings
0,Bayview,45354.45,117
1,Bernal Heights,72724.98,222
2,Castro/Upper Market,90733.81,231
3,Chinatown,99129.42,129
4,Crocker Amazon,35399.72,52


In [8]:
# Merge the averages into one dataframe
Airbnb_avg = pd.merge(pd.merge(ave_2015, ave_2016, on = "neighbourhood"), ave_2017, on = "neighbourhood")
Airbnb_avg.head()

Unnamed: 0,neighbourhood,Average annual rate_x,No. of listings_x,Average annual rate_y,No. of listings_y,Average annual rate,No. of listings
0,Bayview,46374.21,84,58118.04,126,45354.45,117
1,Bernal Heights,68704.42,156,77447.15,191,72724.98,222
2,Castro/Upper Market,84268.13,191,97370.54,246,90733.81,231
3,Chinatown,91821.37,78,108057.51,123,99129.42,129
4,Crocker Amazon,47207.84,46,43735.51,51,35399.72,52


In [9]:
# Rename the columns
Airbnb_avg = Airbnb_avg.rename(columns = {"Average annual rate_x": "Ave rate 2015",
                                          "Average annual rate_y": "Ave rate 2016",
                                          "Average annual rate": "Ave rate 2017",
                                          "No. of listings_x": "No. listings 2015",
                                          "No. of listings_y": "No. listings 2016",
                                          "No. of listings": "No. listings 2017"})
Airbnb_avg.head()

Unnamed: 0,neighbourhood,Ave rate 2015,No. listings 2015,Ave rate 2016,No. listings 2016,Ave rate 2017,No. listings 2017
0,Bayview,46374.21,84,58118.04,126,45354.45,117
1,Bernal Heights,68704.42,156,77447.15,191,72724.98,222
2,Castro/Upper Market,84268.13,191,97370.54,246,90733.81,231
3,Chinatown,91821.37,78,108057.51,123,99129.42,129
4,Crocker Amazon,47207.84,46,43735.51,51,35399.72,52


In [10]:
# List districts and neighbourhoods
Downtown = ["Chinatown", "Civic Center", "Financial District", "French Quarter", "Mid-Market", "Nob Hill", 
            "North Beach", "Mission Bay", "South of Market", "Telegraph Hill", "Tenderloin", "Union Square", 
            "Downtown","Van Ness/ Civic Center", "Downtown/Civic Center", "Financial District North", 
            "Financial District South", "South Beach"]
North_Downtown = ["Cow Hollow", "Fisherman's Wharf", "Marina", "Pacific Heights", "Presidio", "Russian Hill",
                  "Treasure Island", "Yerba Buena Island", "Treasure Island/YBI", "Presidio Heights", 
                  "Lake --The Presidio", "Yerba Buena", "North Waterfront"]
Outside_Lands = ["Forest Hill", "Ingleside", "Ingleside Terrace", "Ocean View", "Parkside", "Outer Richmond",  
                 "Seacliff", "Sea Cliff", "St. Francis Wood", "Inner Sunset", "West Portal", "Westwood Highlands", 
                 "Westwood Park", "Golden Gate Park", "Inner Richmond","Oceanview", "Outer Sunset", "Balboa Terrace",
                 "Central Richmond", "Lakeshore", "Lake Shore", "Monterey Heights", "Central Sunset", 
                 "Forest Hill Extension", "Golden Gate Heights", "Ingleside Heights", "Pine Lake Park", 
                 "Stonestown", "Lakeside", "Lake", "Inner Parkside", "Merced Heights", "Forest Knolls",
                 "Clarendon Heights", "Merced Manor", "Mount Davidson Manor", "Outer Parkside", "Park North",
                 "Sunnyside"]
Western_Addition = ["Alamo Square", "Anza Vista", "Cathedral Hill", "Cole Valley", "Corona Heights", "Duboce Triangle",
                    "Fillmore", "Haight Ashbury", "Hayes Valley", "Japantown", "Jordan Park/Laurel Heights", 
                    "Laurel Heights", "Lower Haight", "Western Addition", "Haight", "Parnassus/Ashbury Heights",
                    "Lower Pacific Heights", "Lone Mountain", "Panhandle"]
Southern = ["Bayview", "Hunters Point", "Bernal Heights", "Castro", "Croker Amazon", "Crocker Amazon", 
            "Diamond Heights", "Dogpatch", "Eureka Valley", "Excelsior", "Glen Park", "Mission", "Noe Valley", 
            "Mission Dolores","Outer Mission", "Portola", "Potrero Hill", "Visitacion Valley", "Castro/Upper Market", 
            "Upper Market", "Mission Terrace", "Bayview Heights", "Buena Vista", "Buena Vista Park", 
            "Eureka Valley/Dolores Heights", "Inner Mission", "Twin Peaks", "West of Twin Peaks", "Little Hollywood",
            "Midtown Terrace", "Miraloma Park", "Sherwood Forest", "Silver Terrace"]

In [11]:
# Create a new column called District
Airbnb_avg["District"] = np.where(Airbnb_avg["neighbourhood"].isin(Downtown),"Downtown", 
                             (np.where(Airbnb_avg["neighbourhood"].isin(North_Downtown),"North Downtown",
                             (np.where(Airbnb_avg["neighbourhood"].isin(Outside_Lands), "Outside Lands",
                             (np.where(Airbnb_avg["neighbourhood"].isin(Western_Addition), "Western Addition",
                              np.where(Airbnb_avg["neighbourhood"].isin(Southern), "Southern", ""))))))))

In [12]:
# Save the dataframe to csv
Airbnb_avg.to_csv('Data/airbnbdataanalysis.csv', index = False)