# Part II: Where to live?

In [None]:
import gmaps  #heatmap
import os
import requests
from requests import RequestException  #automatic extraction

## Preparation
listzipcode = pd.DataFrame()
listzipcode['count'] = lists.groupby(['zipcode']).count()['id']
listzipcode['avgprice'] = lists.groupby(['zipcode']).mean()['price']
listzipcode['avgscore'] = lists.groupby(['zipcode']).mean()['review_scores_rating']
listzipcode['location'] = lists.groupby(['zipcode']).mean()['review_scores_location']
listzipcode['value'] = lists.groupby(['zipcode']).mean()['review_scores_value']
listzipcode = listzipcode.reset_index()

In [None]:
## 1.Heat map of Airbnb listing density in Los Angeles
# link to get google map api_key: https://developers.google.com/maps/documentation/embed/get-api-key
gmaps.configure(api_key = "AIzaSyATnwHhxPqq3QN7q5DSIfcMQOTjsStXF30")   
location_columns = lists[['latitude', 'longitude']]
location_tuples = [tuple(x) for x in location_columns.values]
fig = gmaps.figure()
fig.add_layer(gmaps.heatmap_layer(location_tuples))
fig

In [None]:
## 2.Choropleth of Crime Rate by zip code in Los Angeles
# Read shapefile of ZIP Code Tabulation Areas (ZCTAs) geometry of Great Los Angeles Area  
shapefile = gpd.read_file("tl_2018_us_zcta510.shp")[['ZCTA5CE10','geometry']]
shapefile.rename(columns = {'ZCTA5CE10': 'zipcode'}, inplace = True)
la_zip = pd.read_excel('lazip.xlsx').astype(str)
shapefile = shapefile.merge(la_zip, on = 'zipcode', how = 'inner')

# Scrape crime rate by zip code level from www.bestplaces.net and merge to listzipcode.csv
procrime = {}
for i in listzipcode['zipcode']:
    url = 'https://www.bestplaces.net/crime/zip-code/*/*'
    url = os.path.join(url, i)
    html = requests.get(url).text
    if "property crime is" in html:
        procrime[i] = html.split("violent crime is")[1].split()[0].replace('.<small>','')
    else:
        procrime[i] = 'NaN'
crime_rate = pd.DataFrame({'crimerate':procrime}).reset_index()
crime_rate.rename(columns = {'index': 'zipcode'}, inplace = True)
listzipcode = listzipcode.merge(crime_rate, on = 'zipcode', how = 'left')

# Merge geodata with cleaned crime rate dataset
listchoropleth = shapefile.merge(listzipcode, on='zipcode', how='right')

# Create choropleth of Crime Rate by zip code in Los Angeles
variable_crime = 'crimerate'
vmin, vmax = 50, 100
fig, ax = plt.subplots(1, figsize = (20, 10))
ax.set_xlim([-119.1, -117.4])
ax.set_ylim([33.6, 34.9])
ax.axis('off')
ax.set_title('Violent Crime by Zip Code in Los Angeles', \
             fontdict = {'fontsize': '25', 'fontweight' : '3'})

ax.annotate('Source: www.bestplaces.net', xy = (0.1, .08), \
            xycoords = 'figure fraction', horizontalalignment = 'left', \
            verticalalignment = 'top', fontsize = 12, color = '#555555')  

img_crime = listchoropleth.plot(column = variable_crime, scheme = 'fisher_jenks', \
                                cmap = 'Reds', linewidth = 0.8, ax = ax, edgecolor = '0.8')
# Create colorbar as legend
sm_crime = plt.cm.ScalarMappable(cmap = 'Reds', norm = plt.Normalize(vmin = vmin, vmax = vmax)) 
sm_crime._A = []
cbar_crime = fig.colorbar(sm_crime)


In [None]:
## 3.Choropleth of Average Price by zip code in Los Angeles
variable_price = 'avgprice'
vmin, vmax = 0, 2000
fig, ax = plt.subplots(1, figsize=(20, 10))
ax.set_xlim([-119.1, -117.4])
ax.set_ylim([33.6, 34.9])
ax.axis('off')
ax.set_title('Average Price by Zip Code in Los Angeles', \
             fontdict = {'fontsize': '25', 'fontweight' : '3'})

ax.annotate('Source: Inside Airbnb', xy = (0.1, .08), \
            xycoords = 'figure fraction', horizontalalignment = 'left', \
            verticalalignment = 'top', fontsize = 12, color = '#555555')

img_price = listchoropleth.plot(column = variable_price, scheme='fisher_jenks', \
                                cmap = 'Blues', linewidth = 0.8, ax = ax, edgecolor = '0.8')

sm_price = plt.cm.ScalarMappable(cmap = 'Blues', norm = plt.Normalize(vmin = vmin, vmax = vmax))
sm_price._A = []
cbar_price = fig.colorbar(sm_price)


In [None]:
## 4.Line chart showing rating distribution of most expensive listings
# listzipcode.query('avgprice > 1000')
df_price = lists[['zipcode',
                'review_scores_accuracy',
                'review_scores_cleanliness',
                'review_scores_checkin',
                'review_scores_communication',
                'review_scores_location',
                'review_scores_value']].query ('zipcode in ["90077","90210","90265"]')
df_price = df_price.rename(columns = {'review_scores_accuracy': 'accuracy', 
                                    'review_scores_cleanliness': 'cleanliness', 
                                    'review_scores_checkin': 'checkin',
                                    'review_scores_communication':'communication',
                                    'review_scores_location':'location',
                                    'review_scores_value': 'value'})
high_price = df_price[['accuracy',
               'cleanliness',
               'checkin',
               'communication',
               'location',
               'value']].mean()
high_price.plot(kind = 'line', title = 'Rating Distribution of Most Expensive Listings' )


In [None]:
## 5.Horizontal bar chart for 20 neighbourhoods with highest score of value
listvalue = pd.DataFrame()
listvalue['count'] = lists.groupby(['neighbourhood_cleansed']).count()['id']
listvalue['value'] = lists.groupby(['neighbourhood_cleansed']).mean()['review_scores_value']
listvalue = listvalue.reset_index().\
                        query('count>100').\
                        sort_values(by = 'value').\
                        tail(20).\
                        rename(columns = {'neighbourhood_cleansed':'neighbourhood'})

listvalue.plot(kind = 'barh', 
               figsize = (8, 8), 
               y = 'value', 
               x = 'neighbourhood', 
               title = '20 Neighbourhoods with Highest Score of Value', 
               xlim = (9.4,10))
