In [3]:
# Dependencies

import gmaps
import numpy as np
import pandas as pd
import requests
import time
from datetime import datetime
from scipy.stats import linregress
from matplotlib import pyplot as plt
from config import (gkey2)

In [4]:
#import datasets (each dataset represents a different number of bedrooms - 1,2,3,4, or 5+)
zillow_value_1 = pd.read_csv('Resources/zillow_value_1.csv')
zillow_value_2 = pd.read_csv('Resources/zillow_value_2.csv')
zillow_value_3 = pd.read_csv('Resources/zillow_value_3.csv')

In [5]:
# differentiating number of bedrooms within the dataset, given that it does not come with it
zillow_value_1['Bdrm'] = "1"
zillow_value_2['Bdrm'] = "2"
zillow_value_3['Bdrm'] = "3"
len(zillow_value_1)+len(zillow_value_2)+len(zillow_value_3)

74047

In [6]:
# merging all datasets together to work out of one clean document
properties = pd.concat([zillow_value_1,zillow_value_2,zillow_value_3])
len(properties)

74047

In [7]:
# filter for california properties only:
properties_ca = properties.loc[properties['State']=="CA"]
len(properties_ca)

4544

In [8]:
# remove columns that have values prior to date 12/31/2013
properties_ca = properties_ca[['RegionName','RegionType','StateName','State','City','Metro','CountyName','1/31/14','2/28/21','3/31/21','Bdrm']]
properties_ca.head()

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm
12,94109,Zip,CA,CA,San Francisco,San Francisco-Oakland-Hayward,San Francisco County,716780.0,824861,820867,1
19,90250,Zip,CA,CA,Hawthorne,Los Angeles-Long Beach-Anaheim,Los Angeles County,382463.0,608374,614481,1
37,94565,Zip,CA,CA,Pittsburg,San Francisco-Oakland-Hayward,Contra Costa County,,192942,189534,1
41,90046,Zip,CA,CA,Los Angeles,Los Angeles-Long Beach-Anaheim,Los Angeles County,452491.0,647755,650469,1
83,94501,Zip,CA,CA,Alameda,San Francisco-Oakland-Hayward,Alameda County,348254.0,562250,566397,1


In [9]:
# dropping nas
properties_ca = properties_ca.dropna()
len(properties_ca)

3923

In [10]:
# calculate annual growth rate since 2014
properties_ca['CAGR'] = ((properties_ca['3/31/21'] - properties_ca['1/31/14'])/properties_ca['1/31/14']*100)
properties_ca = pd.DataFrame(properties_ca)
properties_ca = properties_ca.sort_values(by='CAGR', ascending=True)
properties_ca = properties_ca.dropna()
len(properties_ca)

3923

In [11]:
properties_ca = properties_ca.loc[properties_ca['CAGR']>0]
properties_ca

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,CAGR
22990,94567,Zip,CA,CA,Pope Valley,Napa,Napa County,341561.0,343077,343655,2,0.613068
1421,95762,Zip,CA,CA,El Dorado Hills,Sacramento--Roseville--Arden-Arcade,El Dorado County,347207.0,346318,353654,1,1.856817
3824,92555,Zip,CA,CA,Moreno Valley,Riverside-San Bernardino-Ontario,Riverside County,364672.0,369914,372444,1,2.131230
25349,95554,Zip,CA,CA,Myers Flat,Eureka-Arcata-Fortuna,Humboldt County,412021.0,424761,422492,3,2.541375
3667,94549,Zip,CA,CA,Lafayette,San Francisco-Oakland-Hayward,Contra Costa County,1034925.0,1073843,1063895,1,2.799237
...,...,...,...,...,...,...,...,...,...,...,...,...
3577,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,179658.0,547554,551084,1,206.740585
2106,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,804901.0,2540611,2511376,1,212.010545
1015,95828,Zip,CA,CA,Florin,Sacramento--Roseville--Arden-Arcade,Sacramento County,94743.0,293825,301028,1,217.731125
9950,90211,Zip,CA,CA,Beverly Hills,Los Angeles-Long Beach-Anaheim,Los Angeles County,789562.0,2575394,2551752,3,223.185766


In [14]:
# creating lat and long columns for final run
properties_ca['Lat'] = ""
properties_ca['Lng'] = ""
properties_ca

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,CAGR,Lat,Lng
22990,94567,Zip,CA,CA,Pope Valley,Napa,Napa County,341561.0,343077,343655,2,0.613068,,
1421,95762,Zip,CA,CA,El Dorado Hills,Sacramento--Roseville--Arden-Arcade,El Dorado County,347207.0,346318,353654,1,1.856817,,
3824,92555,Zip,CA,CA,Moreno Valley,Riverside-San Bernardino-Ontario,Riverside County,364672.0,369914,372444,1,2.131230,,
25349,95554,Zip,CA,CA,Myers Flat,Eureka-Arcata-Fortuna,Humboldt County,412021.0,424761,422492,3,2.541375,,
3667,94549,Zip,CA,CA,Lafayette,San Francisco-Oakland-Hayward,Contra Costa County,1034925.0,1073843,1063895,1,2.799237,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3577,95816,Zip,CA,CA,Sacramento,Sacramento--Roseville--Arden-Arcade,Sacramento County,179658.0,547554,551084,1,206.740585,,
2106,90266,Zip,CA,CA,Manhattan Beach,Los Angeles-Long Beach-Anaheim,Los Angeles County,804901.0,2540611,2511376,1,212.010545,,
1015,95828,Zip,CA,CA,Florin,Sacramento--Roseville--Arden-Arcade,Sacramento County,94743.0,293825,301028,1,217.731125,,
9950,90211,Zip,CA,CA,Beverly Hills,Los Angeles-Long Beach-Anaheim,Los Angeles County,789562.0,2575394,2551752,3,223.185766,,


In [12]:
# create a params dict that will be updated with new city each iteration
params = {"key": gkey2}

# Loop through the cities_pd and run a lat/long search for each city
for index, row in properties_ca.iterrows():
    base_url = "https://maps.googleapis.com/maps/api/geocode/json"

    zipcode = row['RegionName']

    # update address key value
    params['address'] = (zipcode)

    # make request
    cities_lat_lng = requests.get(base_url, params=params)
    
    # print the cities_lat_lng url, avoid doing for public github repos in order to avoid exposing key
    # print(cities_lat_lng.url)
    
    # convert to json
    cities_lat_lng = cities_lat_lng.json()

    properties_ca.loc[index, "Lat"] = cities_lat_lng["results"][0]["geometry"]["location"]["lat"]
    properties_ca.loc[index, "Lng"] = cities_lat_lng["results"][0]["geometry"]["location"]["lng"]

In [15]:
properties_ca.head()

Unnamed: 0,RegionName,RegionType,StateName,State,City,Metro,CountyName,1/31/14,2/28/21,3/31/21,Bdrm,CAGR,Lat,Lng
22990,94567,Zip,CA,CA,Pope Valley,Napa,Napa County,341561.0,343077,343655,2,0.613068,,
1421,95762,Zip,CA,CA,El Dorado Hills,Sacramento--Roseville--Arden-Arcade,El Dorado County,347207.0,346318,353654,1,1.856817,,
3824,92555,Zip,CA,CA,Moreno Valley,Riverside-San Bernardino-Ontario,Riverside County,364672.0,369914,372444,1,2.13123,,
25349,95554,Zip,CA,CA,Myers Flat,Eureka-Arcata-Fortuna,Humboldt County,412021.0,424761,422492,3,2.541375,,
3667,94549,Zip,CA,CA,Lafayette,San Francisco-Oakland-Hayward,Contra Costa County,1034925.0,1073843,1063895,1,2.799237,,


In [16]:
properties_ca.to_csv("Resources/clean_property_value.csv", index = False)