In [19]:
# Dependencies

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from datetime import datetime
from scipy.stats import linregress
from matplotlib import pyplot as plt
from config_Laura import (gkey)

In [24]:
property_dataset = pd.read_csv('Resources_Laura/clean_property_value.csv')
len(property_dataset)

3923

In [30]:
property_data_neg = property_dataset.loc[property_dataset['% Change']<3]
property_data_neg

# negproperty_dataset['% Change']<0:

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,% Change,Lat,Lng
0,95412,Zip,CA,CA,Annapolis,Santa Rosa,Sonoma County,721046.0,526204,520369,1,-27.831373,38.713892,-123.326167
1,95412,Zip,CA,CA,Annapolis,Santa Rosa,Sonoma County,730825.0,653418,645435,2,-11.684056,38.713892,-123.326167
3,95017,Zip,CA,CA,Davenport,Santa Cruz-Watsonville,Santa Cruz County,789701.0,609039,612187,1,-22.478634,37.091858,-122.231354
6,94020,Zip,CA,CA,La Honda,San Francisco-Oakland-Hayward,San Mateo County,1047622.0,848184,854623,2,-18.42258,37.291115,-122.208658
8,94060,Zip,CA,CA,Pescadero,San Francisco-Oakland-Hayward,San Mateo County,1425555.0,1185489,1185573,2,-16.834286,37.205116,-122.333426
10,93110,Zip,CA,CA,Santa Barbara,Santa Maria-Santa Barbara,Santa Barbara County,589390.0,508970,511314,1,-13.246916,34.508431,-119.731021
13,93920,Zip,CA,CA,Big Sur,Salinas,Monterey County,2695050.0,2419143,2379601,3,-11.704755,36.051496,-121.514162
14,95559,Zip,CA,CA,Myers Flat,Eureka-Arcata-Fortuna,Humboldt County,330723.0,298207,298834,2,-9.642208,40.196618,-123.800432
15,96142,Zip,CA,CA,Tahoma,Sacramento--Roseville--Arden-Arcade,El Dorado County,476696.0,436435,437163,1,-8.293126,39.05013,-120.141609
18,93224,Zip,CA,CA,Fellows,Bakersfield,Kern County,152653.0,143991,144003,3,-5.666446,35.203105,-119.58036


In [4]:
#sorting by % change and removing those that are negative, since we do not want to see those in the heatmap.
property_dataset = property_dataset.sort_values(by='% Change', ascending=False)
property_dataset = property_dataset.loc[property_dataset['% Change']>0]

In [5]:
# after cleaning, create back a dataframe for each of the houses with specific bedroom markets in case we want to separate
one_bedr = property_dataset.loc[property_dataset['Bdrm']==1]
two_bedr = property_dataset.loc[property_dataset['Bdrm']==2]
three_bedr = property_dataset.loc[property_dataset['Bdrm']==3]

In [33]:
three_bedr.head(10)

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,% Change,Lat,Lng
3109,90211,Zip,CA,CA,Beverly Hills,Los Angeles-Long Beach-Anaheim,Los Angeles County,789562.0,2575394,2551752,3,223.185766,34.066107,-118.384178
3919,91108,Zip,CA,CA,San Marino,Los Angeles-Long Beach-Anaheim,Los Angeles County,609176.0,1810066,1820804,3,198.896214,34.124792,-118.11812
3921,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,802363.0,2372472,2362929,3,194.496257,33.890837,-118.398784
2743,90212,Zip,CA,CA,Beverly Hills,Los Angeles-Long Beach-Anaheim,Los Angeles County,994511.0,2904606,2876834,3,189.27121,34.061711,-118.401705
3581,90027,Zip,CA,CA,Los Angeles,Los Angeles-Long Beach-Anaheim,Los Angeles County,586644.0,1677238,1693712,3,188.712064,34.122054,-118.293589
3917,95817,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,183057.0,492476,499768,3,173.012231,38.550043,-121.459901
3248,94610,Zip,CA,CA,Oakland,San Francisco-Oakland-Hayward,Alameda County,669975.0,1767567,1799871,3,168.647487,37.810448,-122.239864
3472,95818,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,297671.0,764333,770494,3,158.840801,38.560306,-121.497029
3875,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,299029.0,767138,771831,3,158.112424,38.580018,-121.462758
3538,95202,Zip,CA,CA,Stockton,Stockton-Lodi,San Joaquin County,123896.0,312822,318038,3,156.697553,37.957384,-121.288386


In [6]:
min(one_bedr['% Change'])

1.8568174028749418

In [7]:
#CONFIGURE GMAPS
gmaps.configure(api_key=gkey)

# Store 'Lat' and 'Lng' into  locations 
locations = one_bedr[["Lat", "Lng"]].astype(float)

# humity to plot heat map
CAGR_percent = one_bedr["% Change"].astype(float)

In [8]:
# location Heatmap layer (change depending on which one you'd like to plot)
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=CAGR_percent, 
                                 dissipating=False, max_intensity=180,
                                 point_radius = 0.1)
fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [9]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

one_bedr_zip = one_bedr['RegionName'].to_list()

rank_one = []
new_zipcode = []

for zipcode in one_bedr_zip:
    rank_one.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
one_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_one':rank_one})

In [10]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

two_bedr_zip = two_bedr['RegionName'].to_list()

rank_two = []
new_zipcode = []

for zipcode in two_bedr_zip:
    rank_two.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
two_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_two':rank_two})

In [11]:
# rank each of the top 20 cities (for each bedroom) and find the common denominator amongst all 3 types to rank them to identify cities that have increased their value in all 3 types of residential properties.
count = 1

three_bedr_zip = three_bedr['RegionName'].to_list()

rank_three = []
new_zipcode = []

for zipcode in three_bedr_zip:
    rank_three.append(count)
    new_zipcode.append(zipcode)
    count = count + 1
    
three_bedr_df = pd.DataFrame({'RegionName':new_zipcode,'rank_three':rank_three})

In [12]:
# final summary table to rank the zip codes with the highest increases in property values. We chose rank because if we do an average of % change of al three types, one high increase may impact the overall average.
ranking = pd.merge(one_bedr_df, two_bedr_df, on="RegionName")
ranking = pd.merge(ranking, three_bedr_df, on="RegionName")
ranking = ranking.dropna()
ranking.head()


Unnamed: 0,RegionName,rank_one,rank_two,rank_three
0,95822,1,24,331
1,95828,2,117,240
2,90266,3,1,3
3,95816,4,139,9
4,95818,5,408,8


In [13]:
#add rankings together, to see which zipcode has best % increase across all home types

sum_column = ranking["rank_one"] + ranking["rank_two"] + ranking["rank_three"]
ranking["rank_sum"] = sum_column
ranking = ranking.sort_values(by='rank_sum', ascending=True)
ranking.head(10)

Unnamed: 0,RegionName,rank_one,rank_two,rank_three,rank_sum
2,90266,3,1,3,7
9,92252,10,16,47,73
12,90254,13,44,28,85
75,95205,78,3,13,94
41,94621,43,52,25,120
50,95215,52,32,41,125
6,92007,7,42,81,130
19,95824,20,29,94,143
71,95351,74,13,59,146
3,95816,4,139,9,152


In [14]:
#find city that has best property increase value in all 3 house types
winner_value = int(ranking['RegionName'].head(1))
finder_value = property_dataset.loc[property_dataset['RegionName']==winner_value]
answer = finder_value['City']
answer = answer.drop_duplicates()


print('The zipcode with best ranking in terms of property value increase for one, two and three bedrooms is Sacramento''')




The zipcode with best ranking in terms of property value increase for one, two and three bedrooms is Sacramento


In [15]:
# dataset to plot the top 10 cities with highest ranking property increase for all 3 bedrooms
most_favorable = ranking.head(10)
most_favorable = most_favorable['RegionName'].to_list()
most_favorable_group = property_dataset.loc[property_dataset['RegionName'].isin(most_favorable)]
most_favorable_group

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,% Change,Lat,Lng
3922,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,804901.0,2540611,2511376,1,212.010545,33.890837,-118.398784
3876,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,179658.0,547554,551084,1,206.740585,38.580018,-121.462758
3921,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,802363.0,2372472,2362929,3,194.496257,33.890837,-118.398784
3920,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,684856.0,2005789,1993710,2,191.113752,33.890837,-118.398784
3895,95205,Zip,CA,CA,Stockton,Stockton-Lodi,San Joaquin County,90785.0,248692,253963,2,179.741147,37.964259,-121.256914
3861,92007,Zip,CA,CA,Encinitas,San Diego-Carlsbad,San Diego County,499139.0,1373708,1371843,1,174.841878,33.017025,-117.27349
3900,92252,Zip,CA,CA,Joshua Tree,Riverside-San Bernardino-Ontario,San Bernardino County,96129.0,247008,255545,1,165.835492,34.193766,-116.254086
3875,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,299029.0,767138,771831,3,158.112424,38.580018,-121.462758
3891,95351,Zip,CA,CA,Modesto,Modesto,Stanislaus County,100196.0,251848,257090,2,156.587089,37.618393,-120.993368
3912,90254,Zip,CA,CA,Hermosa Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,465994.0,1163724,1170339,1,151.148942,33.860069,-118.398784


In [42]:
unique_cities = most_favorable_group['City']
unique_cities = unique_cities.unique()
unique_cities

array(['Manhattan Beach', 'Sacramento', 'Stockton', 'Encinitas',
       'Joshua Tree', 'Modesto', 'Hermosa Beach', 'Garden Acres',
       'Oakland'], dtype=object)

In [43]:
cities = most_favorable_group["City"].tolist()
marker_locations = most_favorable_group[['Lat', 'Lng']]

In [44]:
# Create a marker_layer using the poverty list to fill the info box and combine both maps together
fig = gmaps.figure()
markers = gmaps.marker_layer(marker_locations,
    info_box_content=[f"Profitable: {cities}" for city in cities])
fig.add_layer(markers)
fig

Figure(layout=FigureLayout(height='420px'))