## Setup

In [1]:
import urllib.request
import zipfile
import pandas as pd
from urllib.parse import urlencode
import json
import gzip

### Define Data

In [2]:
DATA_DIR = './data/'
ZIPS_URL = 'http://www2.census.gov/geo/docs/maps-data/data/gazetteer/2017_Gazetteer/2017_Gaz_zcta_national.zip'
ZIPS_FP = DATA_DIR + 'zips-2017.zip'
ZHVI_URL = 'http://files.zillowstatic.com/research/public/Zip/Zip_Zhvi_SingleFamilyResidence.csv'
ZHVI_FP = DATA_DIR + 'sfr.csv'

### (Optional) Refresh Data

In [3]:
urllib.request.urlretrieve(ZIPS_URL, ZIPS_FP);

In [4]:
urllib.request.urlretrieve(ZHVI_URL, ZHVI_FP);

## Home Prices

In [5]:
df = pd.read_csv(ZHVI_FP)
counties = ['San Francisco', 'San Mateo', 'San Jose']
df = df[df['CountyName'].isin(counties)]
zips = df['RegionName']

## Schools

In [6]:
df2 = pd.read_csv(ZIPS_FP, sep='\t')
df2 = df2[df2['GEOID'].isin(zips.values)]

In [7]:
base = 'https://www.greatschools.org/search/search.page?'
params = {
    'lat': '37.554298',
    'lon': '-122.496632',
    'zip': '94037',
    'state': 'CA'
}
url = base + urlencode(params)
headers = {
    "user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10.13; rv:58.0) Gecko/20100101 Firefox/58.0",
    "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8",
    "accept-encoding": "gzip,deflate,br",
    "accept-language": "en-US,en;q=0.5",
}

In [8]:
req = urllib.request.Request(url, None, headers)
with urllib.request.urlopen(req) as res:
    html = gzip.decompress(res.read()).decode('utf-8')

In [9]:
raw = html.find('gon.map_points')
start = html.find('[',raw)
end = html.find(']',raw)
data = html[start:end+1]
schools = json.loads(data)
rated = [i for i in schools if i['gsRating'] > 0]

In [10]:
rated

[{'city': 'Pacifica',
  'communityRatingStars': 5,
  'fitScore': 0,
  'gradeRange': 'K-8',
  'gsRating': 9,
  'id': 6884,
  'lat': 37.59849548339844,
  'lng': -122.49832153320312,
  'maxFitScore': 0,
  'name': 'Cabrillo Elementary School',
  'numReviews': 129,
  'okFit': False,
  'on_page': True,
  'preschool': False,
  'profileUrl': '/california/pacifica/6884-Cabrillo-Elementary-School/',
  'reviewUrl': '/california/pacifica/6884-Cabrillo-Elementary-School/#Reviews',
  'schoolType': 'Public district',
  'state': 'ca',
  'street': '601 Crespi Drive',
  'strongFit': False,
  'zillowUrl': 'https://www.zillow.com/CA-94044?cbpartner=Great+Schools&utm_source=GreatSchools&utm_medium=referral&utm_campaign=schoolsearch',
  'zipcode': '94044'},
 {'city': 'Pacifica',
  'communityRatingStars': 5,
  'fitScore': 0,
  'gradeRange': 'K-8',
  'gsRating': 9,
  'id': 6892,
  'lat': 37.61370086669922,
  'lng': -122.4847412109375,
  'maxFitScore': 0,
  'name': 'Vallemar Elementary School',
  'numReviews':

In [11]:
# Calculating the average rating for each zipcode
total_rating = 0
num_schools = len(rated)
for school in rated:
    rating = school['gsRating']
    total_rating = total_rating + rating
avg_rating = round(total_rating/num_schools, 2)
print(avg_rating)

6.33


In [12]:
# Grabbing the zipcode
zipcode = rated[0]['zipcode']

In [13]:
# Creating a list of all types of grade ranges
grades = []
grade_range= []
for school in rated:
    grade = school['gradeRange']
    grades.append(grade)
[grade_range.append(item) for item in grades if item not in grade_range]
print(grade_range)

['K-8', '9-12', 'K-5']


In [14]:
# Creating a final table
d = {'zipcode': zipcode, 'avg_rating': avg_rating, 
     'number_of_schools': num_schools, 'grade_ranges': [grade_range]}
schools_df = pd.DataFrame(data =d)
schools_df = schools_df.set_index('zipcode')
schools_df.head()

Unnamed: 0_level_0,avg_rating,grade_ranges,number_of_schools
zipcode,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
94044,6.33,"[K-8, 9-12, K-5]",6


In [15]:
# Working on iterating through the zipcodes
df2 = df2.drop(["ALAND", "AWATER", "ALAND_SQMI", "AWATER_SQMI"], axis =1)
df2.columns=["zipcode", "lat", "long"]
df2.head()

Unnamed: 0,zipcode,lat,long
30954,94002,37.514354,-122.298901
30955,94005,37.688826,-122.408935
30956,94010,37.57028,-122.365778
30957,94014,37.690884,-122.447441
30958,94015,37.681312,-122.480634
