In [11]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import openweathermapy as owm
from pprint import pprint
from census import Census
import pandas as pd
import numpy as np
import gmaps
import requests
import json
import time
import csv

# Import API key
# Import pyzillow 
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults


# Import Census and Gmaps API Key
from config import (census_key, gkey)
c = Census(census_key, year=2016)

In [12]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                        "B19301_001E", "B17001_002E", "B25077_001E",
                        "B08136_003E", "B08301_003E"), 
                        {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B25077_001E": "Median Home Value ($)",
                                      "B08136_003E": "Time spent commuting (in minutes)",
                                      "B08301_003E": "Means of Transportation to Work",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate","Median Home Value ($)",
                       "Time spent commuting (in minutes)","Means of Transportation to Work"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,1001,17423.0,45.0,56714.0,30430.0,1462.0,8.391207,202800.0,,7496.0
1,1002,29970.0,23.2,48923.0,26072.0,8351.0,27.864531,344000.0,188755.0,8650.0
2,1003,11296.0,19.9,2499.0,3829.0,54.0,0.478045,-666666666.0,,618.0
3,1005,5228.0,44.1,70568.0,32169.0,230.0,4.399388,213700.0,,1832.0
4,1007,14888.0,42.5,80502.0,36359.0,1410.0,9.470715,258000.0,,6368.0


In [27]:
# Import the original data we analyzed earlier. Use dtype="object"
zipcode_data = pd.read_csv("bayarea_zipcodes.csv", dtype="object", encoding="utf-8")
print(len(census_data_complete))
zipcode_data.head(10)

203


Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat
0,NAPA,94558,CA,12313263537.0,995176.2253,-122.103292,38.513283
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511
2,DIXON,95620,CA,7236949521.0,441860.2014,-121.653355,38.313387
3,SONOMA,95476,CA,3001414165.0,311318.5463,-122.406843,38.155682
4,NAPA,94559,CA,1194301745.0,359104.6466,-122.293685,38.155238
5,PETALUMA,94954,CA,2006544443.0,267474.4906,-122.457669,38.116895
6,RIO VISTA,94571,CA,4454445620.0,492056.7524,-121.862462,38.06603
7,TRAVIS AFB,94535,CA,302939707.7,95232.00842,-121.896539,38.286568
8,AMERICAN CANYON,94503,CA,693134128.3,136394.6951,-122.204187,38.209695
9,NOVATO,94949,CA,431605416.6,119395.6721,-122.486559,38.100527


In [14]:
# Merge the two data sets along zip code
census_data_complete = pd.merge(
    zipcode_data, census_pd, how="left", on=["Zipcode", "Zipcode"])
# Visualize
print(len(census_data_complete))
census_data_complete.head(10)


214


Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,NAPA,94558,CA,12313263537.0,995176.2253,-122.103292,38.513283,68366.0,41.7,77299.0,38140.0,5198.0,7.603195,523500.0,,26276.0
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511,72349.0,33.9,57316.0,24388.0,11523.0,15.926965,249400.0,651035.0,22404.0
2,DIXON,95620,CA,7236949521.0,441860.2014,-121.653355,38.313387,21685.0,34.4,72583.0,27274.0,2949.0,13.599262,310700.0,,7865.0
3,SONOMA,95476,CA,3001414165.0,311318.5463,-122.406843,38.155682,37534.0,47.3,68894.0,45910.0,4130.0,11.003357,547800.0,,12538.0
4,NAPA,94559,CA,1194301745.0,359104.6466,-122.293685,38.155238,28355.0,37.4,65891.0,34815.0,2813.0,9.920649,484900.0,,10484.0
5,PETALUMA,94954,CA,2006544443.0,267474.4906,-122.457669,38.116895,37943.0,40.6,83713.0,36188.0,3055.0,8.051551,475800.0,,13988.0
6,RIO VISTA,94571,CA,4454445620.0,492056.7524,-121.862462,38.06603,8802.0,60.5,54852.0,34173.0,882.0,10.02045,271700.0,78505.0,2165.0
7,TRAVIS AFB,94535,CA,302939707.7,95232.00842,-121.896539,38.286568,4269.0,23.7,54692.0,18755.0,359.0,8.409464,-666666666.0,13490.0,1399.0
8,AMERICAN CANYON,94503,CA,693134128.3,136394.6951,-122.204187,38.209695,20366.0,35.9,83627.0,30601.0,1972.0,9.682805,374400.0,,7416.0
9,NOVATO,94949,CA,431605416.6,119395.6721,-122.486559,38.100527,16905.0,45.0,78568.0,55946.0,2029.0,12.002366,644800.0,164700.0,5789.0


In [15]:
# Remove NaN and Negative Value for the Median Home Value Coulumn 
census_data_complete = census_data_complete[(census_data_complete[["Median Home Value ($)"]]>0).all(1)]
# Visualize
print(len(census_data_complete))
census_data_complete.head(10)

203


Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,NAPA,94558,CA,12313263537.0,995176.2253,-122.103292,38.513283,68366.0,41.7,77299.0,38140.0,5198.0,7.603195,523500.0,,26276.0
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511,72349.0,33.9,57316.0,24388.0,11523.0,15.926965,249400.0,651035.0,22404.0
2,DIXON,95620,CA,7236949521.0,441860.2014,-121.653355,38.313387,21685.0,34.4,72583.0,27274.0,2949.0,13.599262,310700.0,,7865.0
3,SONOMA,95476,CA,3001414165.0,311318.5463,-122.406843,38.155682,37534.0,47.3,68894.0,45910.0,4130.0,11.003357,547800.0,,12538.0
4,NAPA,94559,CA,1194301745.0,359104.6466,-122.293685,38.155238,28355.0,37.4,65891.0,34815.0,2813.0,9.920649,484900.0,,10484.0
5,PETALUMA,94954,CA,2006544443.0,267474.4906,-122.457669,38.116895,37943.0,40.6,83713.0,36188.0,3055.0,8.051551,475800.0,,13988.0
6,RIO VISTA,94571,CA,4454445620.0,492056.7524,-121.862462,38.06603,8802.0,60.5,54852.0,34173.0,882.0,10.02045,271700.0,78505.0,2165.0
8,AMERICAN CANYON,94503,CA,693134128.3,136394.6951,-122.204187,38.209695,20366.0,35.9,83627.0,30601.0,1972.0,9.682805,374400.0,,7416.0
9,NOVATO,94949,CA,431605416.6,119395.6721,-122.486559,38.100527,16905.0,45.0,78568.0,55946.0,2029.0,12.002366,644800.0,164700.0,5789.0
10,NOVATO,94945,CA,753717019.2,159439.8805,-122.486559,38.100527,19095.0,44.4,90685.0,45573.0,1531.0,8.017806,682900.0,,6127.0


In [16]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [17]:
# Store 'Lat' and 'Lng' into  locations 
locations_1 = census_data_complete[["Lat", "Lng"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
median_home_value = census_data_complete["Median Home Value ($)"].astype(float)


In [18]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations_1, weights=median_home_value, dissipating=False, point_radius = 0.05)
#                               dissipating=False, max_intensity=1,
#                                  point_radius = 0.01)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
# heat_layer.dissipating = False
# heat_layer.max_intensity = 1
# heat_layer.point_radius = 0.01

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [19]:
def to_lat_lng(json):
  return '{0}, {1}'.format(json["lat"], json['lng'])

def get_cities_df(cities):
    points = []
    north_easts = []
    south_wests = []
    
    for target_city in cities: 
        # Build the endpoint URL
        target_url = ('https://maps.googleapis.com/maps/api/geocode/json?address={0}&key={1}').format(target_city, gkey)
        # Run a request to endpoint and convert result to json
        response = requests.get(target_url).json()
        geometry = response["results"][0]["geometry"]

        # Extract latitude and longitude.
        points.append(to_lat_lng(geometry["location"]))
        north_easts.append(to_lat_lng(geometry["bounds"]["northeast"]))
        south_wests.append(to_lat_lng(geometry["bounds"]["southwest"]))

    return pd.DataFrame({
        "City": cities,
        "Point": points,
        "Northeast": north_easts,
        "Southwest": south_wests,
    }) 

# cities = ["Palo Alto, California"]
cities = ["San Francisco, California", "Redwood City, California", "Berkeley, California", "Palo Alto, California", "Mountain View, California", "Sunnyvale, California", "Cupertino, California", "Santa Clara, California", "San Jose, California", "Fremont, California"]
# cities_df = get_cities_df(cities)
# cities_df.to_csv('cities.csv')


In [20]:
def get_response(url, params):
    response = requests.get(url, params=params).json()
    if response['status'] == 'OK':
        return response
    else:
        print('Not OK response: ' + response['status'])
        return None

# Returns DataFrame
def request_places(city, location, type, gkey):    
    base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
    # rankby:distance will result INVALID_REQUSET
    params = {
        "location": location,
        "radius": 10000, # hard-code distance
        "type": type,
        "key": gkey,
    }

    response = get_response(base_url, params)
    results= response['results']
    count = 1
    
    # If there is no page token, return result, otherwise concat result with next page.
    while (count < 3 and 'next_page_token' in response):
        count = count + 1
        token = response['next_page_token']
#         print(f'Getting next page: {count}, token: {token}')
        
        # Sleep a while so that token can be used.
        time.sleep(3)
        params = {
            "location": location,
            "radius": 10000, # hard-code distance
            "type": type,
            "key": gkey,
            "page_token": response['next_page_token'],
        }
        base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"
        response = get_response(base_url, params)
        results.extend(response['results'])
    
    print(f'Total page count: {count}')
    return results

def to_data_frame(city, type, results):
    cities = []
    types = []
    place_ids = []
    names = []
    poi = []
    vicnities = []
#     locations = []
    lat = []
    lng = []
    
    for result in results:
        # Skip locality place, e.g. Palo Alto.
        if ('locality' in result['types']):
            continue

        cities.append(city)
        types.append(type)
        place_ids.append(result['place_id'])
        names.append(result['name'])
        poi.append(result['types'])
        vicnities.append(result['vicinity'])
#         locations.append(to_lat_lng(result['geometry']['location']))
        lat.append(result['geometry']['location']['lat'])
        lng.append(result['geometry']['location']['lng'])
    
    return pd.DataFrame({
        'City': cities,
        'Type': types,
        'Place ID': place_ids,
        'Name': names,
        'Point of interests': poi,
        'Address': vicnities,
#         'LatLng': locations,
        'Lat': lat,
        'Lng': lng,
    })

def get_places_data_frame(city, location, type, gkey):
    print(f'Getting plcaes, city: "{city}", location: "{location}", type: "{type}"');
    results = request_places(city, location, type, gkey)
    return to_data_frame(city, type, results) 

In [22]:
cities_df = pd.read_csv("cities.csv")
type = 'school'

def get_all_places_data_frame(type, gkey):
    frames = [];
    for index, row in cities_df.iterrows():
        frames.append(get_places_data_frame(row['City'], row['Point'], type, gkey))
    return pd.concat(frames)

place_df = get_all_places_data_frame(type, gkey)
place_df.to_csv(type + '.csv')
place_df

Getting plcaes, city: "San Francisco, California", location: "37.7749295, -122.4194155", type: "school"
Total page count: 3
Getting plcaes, city: "Redwood City, California", location: "37.48521520000001, -122.2363548", type: "school"
Total page count: 3
Getting plcaes, city: "Berkeley, California", location: "37.8715926, -122.272747", type: "school"
Total page count: 3
Getting plcaes, city: "Palo Alto, California", location: "37.4418834, -122.1430195", type: "school"
Total page count: 3
Getting plcaes, city: "Mountain View, California", location: "37.3860517, -122.0838511", type: "school"
Total page count: 1
Getting plcaes, city: "Sunnyvale, California", location: "37.36883, -122.0363496", type: "school"
Total page count: 1
Getting plcaes, city: "Cupertino, California", location: "37.3229978, -122.0321823", type: "school"
Total page count: 1
Getting plcaes, city: "Santa Clara, California", location: "37.3541079, -121.9552356", type: "school"
Total page count: 1
Getting plcaes, city: "S

Unnamed: 0,City,Type,Place ID,Name,Point of interests,Address,Lat,Lng
0,"San Francisco, California",school,ChIJi_z_a6KAhYARHyEzLOKy6xQ,Yoga Tree Hayes,"[gym, health, school, point_of_interest, estab...","519 Hayes Street, San Francisco",37.776507,-122.425014
1,"San Francisco, California",school,ChIJu2l-LmWAhYARxmTpj4vb0I8,Bright Horizons at 221 Main,"[school, point_of_interest, establishment]","221 Main Street, San Francisco",37.790340,-122.392263
2,"San Francisco, California",school,ChIJEXlt4TJ-j4ARRpaWbbt3zco,Le Cordon Bleu College of Culinary Arts in San...,"[school, point_of_interest, establishment]","350 Rhode Island Street, San Francisco",37.765888,-122.402882
3,"San Francisco, California",school,ChIJU7vCd5F9j4AR9GT0sizLXwo,San Francisco Driving School,"[school, point_of_interest, establishment]","2409 19th Avenue, San Francisco",37.742789,-122.475891
4,"San Francisco, California",school,ChIJBc_rV-eAhYARxkitrz4kdj4,Galileo Academy of Science and Technology,"[school, point_of_interest, establishment]","1150 Francisco Street, San Francisco",37.803462,-122.424029
5,"San Francisco, California",school,ChIJgaTBe499j4ARxsYjNvqyYZY,Abraham Lincoln High School,"[school, point_of_interest, establishment]","2162 24th Avenue, San Francisco",37.746907,-122.479982
6,"San Francisco, California",school,ChIJXS6-VzOHhYARd7uawbWEhlM,Jewish Community Center of San Francisco,"[gym, health, school, point_of_interest, estab...","3200 California Street, San Francisco",37.787337,-122.447264
7,"San Francisco, California",school,ChIJLbVZeqN9j4AR_U7DEiiHSNY,Lowell High School,"[school, point_of_interest, establishment]","1101 Eucalyptus Drive, San Francisco",37.730442,-122.482576
8,"San Francisco, California",school,ChIJvXl2weCAhYARmuP3Xwe2a2s,Peekadoodle Kidsclub,"[school, point_of_interest, establishment]","900 North Point Street f100, San Francisco",37.805921,-122.423406
9,"San Francisco, California",school,ChIJ415kURh-j4ARPfBAwXpgEWk,Mission High School,"[school, point_of_interest, establishment]","3750 18th Street, San Francisco",37.761717,-122.427315


In [23]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)
# Store 'Lat' and 'Lng' into  locations 
locations_2 = place_df[["Lat", "Lng"]].astype(float)

In [24]:
# Create bank symbol layer
company_layer = gmaps.symbol_layer(
    locations_2, fill_color='rgba(0, 150, 0, 0.4)',
    stroke_color='rgba(0, 0, 150, 0.4)', scale=2,
)


fig = gmaps.figure()
fig.add_layer(company_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [25]:
# Create a combined map
fig = gmaps.figure()

fig.add_layer(heat_layer)
fig.add_layer(company_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [5]:
# url = "http://www.zillow.com/webservice/GetRegionChildren.htm?zws-id=<X1-ZWz1gocnjquzgr_23up5>&state=wa&city=seattle&childtype=neighborhood"
# response = requests.get(url)
# response

In [6]:
# response = requests.get(url).json()
# response

In [7]:
address = '4691 hampshire way, fremont,ca'
zipcode = '94538'
zillow_data = ZillowWrapper("X1-ZWz1gocnjquzgr_23up5")
deep_search_response = zillow_data.get_deep_search_results(address,zipcode)
result = GetDeepSearchResults(deep_search_response)
result.last_sold_price
#response = requests.get(deep_search_response).json()


'360000'

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
weather_data = []
count = 0

settings = {"units": "metric", "appid": api_key}

print(f"Beginning Data Retrieval    ")
print(f"------------------------------")

for city in cities:
    count = count + 1    
    try:
        current_weather = owm.get_current(city, **settings)
        print(f"Processing Record {count} of Set {len(cities)} | {city}")
        weather_data.append(current_weather)
    except: 
        print(f"City not found. Skipping...")
        


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
summary = ["name","clouds.all","sys.country","dt","main.humidity", "coord.lat", "coord.lon", "main.temp_max", "wind.speed"]

data = [response(*summary) for response in weather_data]

weather_df = pd.DataFrame(data,columns=["City","Cloudiness","Country","Date","Humidity","Lat",
                                  "Lng","Max Temp","Wind Speed"])
weather_df.head()

In [None]:
weather_df.to_csv("Cities.csv", encoding='utf-8', index=False,header=True)

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Max Temperature (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Temperature (Celsius)")
plt.grid()

plt.savefig("Max_Temperature.png")

plt.show()

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Humidity"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Humidity (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()

plt.savefig("Humidity.png")

plt.show()

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Cloudiness"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Cloudiness (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()

plt.savefig("Cloudiness.png")

plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Wind Speed (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph))")
plt.grid()

plt.savefig("Wind_Speed.png")

plt.show()

## Three observable trends:
1. The city's temperature is higher as they close to the equator, and become lower as they further away from equator.
2. Most cities close to the equator have higher humidity(%). 
3. Cloudiness and Wind speed are not showing clear trend with the change of latitude. 
