In [1]:
# Dependencies

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from datetime import datetime
from scipy.stats import linregress
from matplotlib import pyplot as plt
from config_Laura import (gkey)

In [2]:
property_dataset = pd.read_csv('Resources_Laura/clean_property_value.csv')
property_dataset.head()

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,% Change,Lat,Lng
0,95412,Zip,CA,CA,Annapolis,Santa Rosa,Sonoma County,721046.0,526204,520369,1,-27.831373,38.713892,-123.326167
1,95412,Zip,CA,CA,Annapolis,Santa Rosa,Sonoma County,730825.0,653418,645435,2,-11.684056,38.713892,-123.326167
2,95412,Zip,CA,CA,Annapolis,Santa Rosa,Sonoma County,772083.0,935029,951287,3,23.210458,38.713892,-123.326167
3,95017,Zip,CA,CA,Davenport,Santa Cruz-Watsonville,Santa Cruz County,789701.0,609039,612187,1,-22.478634,37.091858,-122.231354
4,95017,Zip,CA,CA,Davenport,Santa Cruz-Watsonville,Santa Cruz County,669451.0,826864,847012,2,26.523375,37.091858,-122.231354


In [9]:
#sorting by % change and removing those that are negative, since we do not want to see those in the heatmap.
property_dataset = property_dataset.sort_values(by='% Change', ascending=False)
property_dataset = property_dataset.loc[property_dataset['% Change']>0]

In [10]:
# after cleaning, create back a dataframe for each of the houses with specific bedroom markets in case we want to separate
one_bedr = property_dataset.loc[property_dataset['Bdrm']==1]
two_bedr = property_dataset.loc[property_dataset['Bdrm']==2]
three_bedr = property_dataset.loc[property_dataset['Bdrm']==3]

In [11]:
min(three_bedr['% Change'])

2.541375318248342

In [12]:
#CONFIGURE GMAPS
gmaps.configure(api_key=gkey)

# Store 'Lat' and 'Lng' into  locations 
locations = three_bedr[["Lat", "Lng"]].astype(float)

# humity to plot heat map
CAGR_percent = three_bedr["% Change"].astype(float)

In [13]:
# location Heatmap layer (change depending on which one you'd like to plot)
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=CAGR_percent, 
                                 dissipating=False, max_intensity=230,
                                 point_radius = 0.1)

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [24]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

one_bedr_zip = one_bedr['RegionName'].to_list()

rank_one = []
new_zipcode = []

for zipcode in one_bedr_zip:
    rank_one.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
one_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_one':rank_one})

In [28]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

two_bedr_zip = two_bedr['RegionName'].to_list()

rank_two = []
new_zipcode = []

for zipcode in two_bedr_zip:
    rank_two.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
two_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_two':rank_two})

In [29]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

three_bedr_zip = three_bedr['RegionName'].to_list()

rank_three = []
new_zipcode = []

for zipcode in three_bedr_zip:
    rank_three.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
three_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_three':rank_three})

In [31]:
# final summary table to rank the zip codes with the highest increases in property values. We chose rank because if we do an average of % change of al three types, one high increase may impact the overall average.
ranking = pd.merge(one_bedr_df, two_bedr_df, on="RegionName")
ranking = pd.merge(ranking, three_bedr_df, on="RegionName")
ranking = ranking.dropna()
ranking.head()


Unnamed: 0,RegionName,rank_one,rank_two,rank_three
0,95822,1,24,331
1,95828,2,117,240
2,90266,3,1,3
3,95816,4,139,9
4,95818,5,408,8


In [32]:
#add rankings together, to see which zipcode has best % increase across all home types

sum_column = ranking["rank_one"] + ranking["rank_two"] + ranking["rank_three"]
ranking["rank_sum"] = sum_column
ranking = ranking.sort_values(by='rank_sum', ascending=True)
ranking.head(10)

Unnamed: 0,RegionName,rank_one,rank_two,rank_three,rank_sum
2,90266,3,1,3,7
9,92252,10,16,47,73
12,90254,13,44,28,85
75,95205,78,3,13,94
41,94621,43,52,25,120
50,95215,52,32,41,125
6,92007,7,42,81,130
19,95824,20,29,94,143
71,95351,74,13,59,146
3,95816,4,139,9,152


In [88]:
#find city that has best property increase value in all 3 house types
winner_value = int(ranking['RegionName'].head(1))
finder_value = property_dataset.loc[property_dataset['RegionName']==winner_value]
answer = finder_value['City']
answer = answer.drop_duplicates()


print('The zipcode with best ranking in terms of property value increase for one, two and three bedrooms is Sacramento''')




The zipcode with best ranking in terms of property value increase for one, two and three bedrooms is Sacramento


In [101]:
# dataset to plot the top 10 cities with highest ranking property increase for all 3 bedrooms
most_favorable = ranking.head(10)
most_favorable = most_favorable['RegionName'].to_list()
most_favorable_group = property_dataset.loc[property_dataset['RegionName'].isin(most_favorable)]
most_favorable_group

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,% Change,Lat,Lng
3470,95822,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,115210.0,390018,398346,1,245.756445,38.512871,-121.497029
3631,95828,Zip,CA,CA,Florin,Sacramento--Roseville--Arden-Arcade,Sacramento County,94743.0,293825,301028,1,217.731125,38.492988,-121.388467
3922,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,804901.0,2540611,2511376,1,212.010545,33.890837,-118.398784
3876,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,179658.0,547554,551084,1,206.740585,38.580018,-121.462758
3921,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,802363.0,2372472,2362929,3,194.496257,33.890837,-118.398784
3473,95818,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,170088.0,492207,500003,1,193.96724,38.560306,-121.497029
3749,95815,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,93202.0,266890,272679,1,192.567756,38.596316,-121.439904
3920,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,684856.0,2005789,1993710,2,191.113752,33.890837,-118.398784
3861,92007,Zip,CA,CA,Encinitas,San Diego-Carlsbad,San Diego County,499139.0,1373708,1371843,1,174.841878,33.017025,-117.27349
3769,95358,Zip,CA,CA,Modesto,Modesto,Stanislaus County,97053.0,254763,260051,1,167.94741,37.610791,-121.102294


In [106]:
cities = most_favorable_group["City"].tolist()
marker_locations = most_favorable_group[['Lat', 'Lng']]

In [107]:
# Create a marker_layer using the poverty list to fill the info box and combine both maps together
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Profitable: {cities}" for city in cities])
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))