In [5]:
import pandas as pd
import numpy as np

In [6]:
df = pd.read_csv("./data/properties_2017.csv")

  interactivity=interactivity, compiler=compiler, result=result)


In [7]:
print(df.head)

<bound method NDFrame.head of          parcelid  airconditioningtypeid  architecturalstyletypeid  \
0        10754147                    NaN                       NaN   
1        10759547                    NaN                       NaN   
2        10843547                    NaN                       NaN   
3        10859147                    NaN                       NaN   
4        10879947                    NaN                       NaN   
5        10898347                    NaN                       NaN   
6        10933547                    NaN                       NaN   
7        10940747                    NaN                       NaN   
8        10954547                    NaN                       NaN   
9        10976347                    NaN                       NaN   
10       11070347                    1.0                       NaN   
11       11073947                    NaN                       NaN   
12       11114347                    NaN                    

In [8]:
print(df.dtypes)

parcelid                          int64
airconditioningtypeid           float64
architecturalstyletypeid        float64
basementsqft                    float64
bathroomcnt                     float64
bedroomcnt                      float64
buildingclasstypeid             float64
buildingqualitytypeid           float64
calculatedbathnbr               float64
decktypeid                      float64
finishedfloor1squarefeet        float64
calculatedfinishedsquarefeet    float64
finishedsquarefeet12            float64
finishedsquarefeet13            float64
finishedsquarefeet15            float64
finishedsquarefeet50            float64
finishedsquarefeet6             float64
fips                            float64
fireplacecnt                    float64
fullbathcnt                     float64
garagecarcnt                    float64
garagetotalsqft                 float64
hashottuborspa                   object
heatingorsystemtypeid           float64
latitude                        float64


In [31]:
# Minimum/Maximum Longitude/Latitude Coordinates of each entry
# Coordinates are provided are multipled by 10E6, so we must divide to receive proper coordinates.
lat_min = df['latitude'].min()/(10**6)
lat_max = df['latitude'].max()/(10**6)
long_min = df['longitude'].min()/(10**6)
long_max = df['longitude'].max()/(10**6)
# In order to reduce the number of API calls required to collect our data, we must create a map for making API calls.
# To get a better idea of how much area we are covering, we will check the distance we are covering using Geopy

!pip install geopy
from geopy.distance import vincenty

# By obtaining the minimum latitude and longitude coordinates from our data set, we can create a square that includes all properties within our dataset.
# The following coordinates represent the corners of that square.
# Naming convention for us is "[latitude][longitude]_coord"

minmin_coord = (lat_min, long_min)
minmax_coord = (lat_min, long_max)
maxmax_coord = (lat_max, long_max)
maxmin_coord = (lat_max, long_min)

# 2 options exist for determining distance using GeoPY - the Vincenty and great circle distance calculation methods.
# The Vincenty appears to be more accurate so we will be using that.

latitude_distance = vincenty(minmin_coord, maxmin_coord).miles
longitude_distance = vincenty(minmin_coord, minmax_coord).miles 
diagonal_distance = vincenty(minmin_coord, maxmax_coord).miles
print("Latitude Distance in Miles: {}".format(latitude_distance))
print("Longitude Distance in Miles: {}".format(longitude_distance))
print("Diagonal distance in Miles: {}".format(diagonal_distance))
print("Square Miles Covered: {}".format(latitude_distance * longitude_distance))

# Consider slicing to avoid the Pacific Ocean and minimize API calls

Latitude Distance in Miles: 103.06063258699005
Longitude Distance in Miles: 111.16608885291676
Diagonal distance in Miles: 150.88243723630603
Square Miles Covered: 11456.847439403144


In [42]:
# In order to cover ~11,500 square miles, we will be breaking our coordinates up into zones to make API calls.
# This greatly minimizes the number of API calls we will be required to make.
# The standard process for this will be to 1. divide into zones 2. make an API call for each zone coordinate 3. gather all data

# For practical purposes, we will create 1 coordinate for approximately every 10 miles covered. 
lat_coordinate_count = round(latitude_distance/10)
long_coordinate_count = round(longitude_distance/10)

print("Number of Latitudes: {0} \nNumber of Longitudes: {1}\nTotal Coordinates: {2}".format(lat_coordinate_count, long_coordinate_count, lat_coordinate_count * long_coordinate_count))

# Creating Coordinate List to be used for future API calls

coordinate_list = []
lat_list = []
long_list = []

# creating the lat_list
lat_interval = (lat_max - lat_min) / lat_coordinate_count
temp_lat = lat_min
while len(lat_list) < lat_coordinate_count:
    lat_list.append(temp_lat)
    temp_lat += lat_interval
    
# creating the long_list
long_interval = (long_max - long_min) / long_coordinate_count
temp_long = long_min
while len(long_list) < long_coordinate_count:
    long_list.append(temp_long)
    temp_long += long_interval

# using the lat_list and long_list to create the Coordinate List
for i in lat_list:
    for j in long_list:
        coordinate_list.append((i,j))

print("{} Coordinates assigned to the variable 'coordinate_list'")


Number of Latitudes: 10 
Number of Longitudes: 11
Total Coordinates: 110
{} Coordinates assigned to the variable 'coordinate_list'


In [10]:
# Dependencies required for querying the Yelp API

!pip install requests==2.18.4

# import argparse
import json
import pprint
import requests
import sys
import urllib
import os 

from urllib.error import HTTPError
from urllib.parse import quote
from urllib.parse import urlencode



In [102]:
# Yelp API key was exported to the environment in UNIX. In order to utilize this notebook, you will need to get your own API key and export it to the environment.
API_KEY = os.environ['YELP_API_KEY']
API_HOST = 'https://api.yelp.com'
SEARCH_PATH = '/v3/businesses/search'

SEARCH_LIMIT = 25

def request(host, path, api_key, url_params=None):
    url_params = url_params or {}
    url = '{0}{1}'.format(host, quote(path.encode('utf8')))
    headers = {
        'Authorization': 'Bearer %s' % api_key,
    }
    response = requests.request('GET', url, headers=headers, params=url_params)
    return response.json()

def search(api_key, term, latitude, longitude):
    url_params = {
        'term': term.replace(' ', '+'),
        'limit': SEARCH_LIMIT,
        'latitude': latitude,
        'longitude': longitude
    }
    return request(API_HOST, SEARCH_PATH, api_key, url_params=url_params)

def query_api(term, latitude, longitude):
    response = search(API_KEY, term, latitude, longitude)
    businesses = response.get('businesses')

#     if not businesses:
#         print(u'No businesses for {0} at latitude: {1} and longitude: {2} found.'.format(term, latitude, longitude))
#         return
    
    return businesses

In [66]:
# Medical Care Proximity
# Query Yelp API using coordinate list to obtain medical care facilities in range

medical_dict = {}

for coordinate in coordinate_list:
    response = query_api('medical', coordinate[0], coordinate[1])
    for business in response:
        if business['id'] not in medical_dict.keys():
            medical_dict[business['id']] = business

In [120]:
# Method for finding the closest Medical Care Center

def closest_medical(lat, long):
    min_distance = None
    for key, value in medical_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
#         print("lat: {0}\n long: {1}".format(temp_lat, temp_long))
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
#         print("{0} : {1} miles".format(value['id'], current_distance))
        if min_distance is None:
            min_distance = current_distance
        if current_distance < min_distance:
            min_distance = current_distance
    return min_distance

In [89]:
# Method for finding the number of Medical Centers within a certain distance

def medical_proximity_count(lat, long, proximity):
    count = 0
    for key, value in medical_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
        if current_distance < proximity:
            count += 1
    return count

In [107]:
# Grocery Store Proximity
# Query Yelp API using coordinate list to obtain grocery stores in range

grocery_dict = {}

for coordinate in coordinate_list:
    response = query_api('grocery store', coordinate[0], coordinate[1])
    if response != []:
        for business in response:
            if business['id'] not in grocery_dict.keys():
                grocery_dict[business['id']] = business

In [108]:
# Closest Grocery Store

# Method for finding the closest Medical Care Center

def closest_grocery(lat, long):
    min_distance = None
    for key, value in grocery_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
        if min_distance is None:
            min_distance = current_distance
        if current_distance < min_distance:
            min_distance = current_distance
    return min_distance

In [111]:
# Method for finding the number of grocery stores within a certain distance

def grocery_proximity_count(lat, long, proximity):
    count = 0
    for key, value in grocery_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
        if current_distance < proximity:
            count += 1
    return count

In [112]:
# Coffee Proximity
# Query Yelp API using coordinate list to obtain coffee shops in range

coffee_dict = {}

for coordinate in coordinate_list:
    response = query_api('coffee shop', coordinate[0], coordinate[1])
    if response != []:
        for business in response:
            if business['id'] not in coffee_dict.keys():
                coffee_dict[business['id']] = business

In [113]:
# Closest Coffee Shop

# Method for finding the closest coffee shop

def closest_coffee(lat, long):
    min_distance = None
    for key, value in coffee_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
        if min_distance is None:
            min_distance = current_distance
        if current_distance < min_distance:
            min_distance = current_distance
    return min_distance

In [114]:
# Method for finding the number of coffee shops within a certain distance

def coffee_proximity_count(lat, long, proximity):
    count = 0
    for key, value in coffee_dict.items():
        temp_lat = value['coordinates']['latitude']
        temp_long = value['coordinates']['longitude']
        current_distance = vincenty((lat, long), (temp_lat, temp_long)).miles
        if current_distance < proximity:
            count += 1
    return count

In [None]:
# Creating new Pandas columns using methods
# df['medical_proximity'] = closest_medical((df['latitude']/(10**6)), (df['longitude']/(10**6)))
df['medical_proximity'] = df.apply(lambda row: closest_medical(row.latitude/(10**6),row.longitude/(10**6)), axis=1)

In [None]:
'medical_proximity' in df.keys()

In [None]:
df['medical_proximity'].head()