In [1]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import openweathermapy as owm
from pprint import pprint
from census import Census
import pandas as pd
import numpy as np
import gmaps
import requests
import json
import time
import csv

# Import API key
# Import pyzillow 
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults


# Census and gmaps API Key
from config import (census_key, gkey)
c = Census(census_key, year=2016)

In [2]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                        "B19301_001E", "B17001_002E", "B25077_001E",
                        "B08136_003E", "B08301_003E"), 
                        {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B25077_001E": "Median Home Value ($)",
                                      "B08136_003E": "Time spent commuting (in minutes)",
                                      "B08301_003E": "Means of Transportation to Work",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate","Median Home Value ($)",
                       "Time spent commuting (in minutes)","Means of Transportation to Work"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,1001,17423.0,45.0,56714.0,30430.0,1462.0,8.391207,202800.0,,7496.0
1,1002,29970.0,23.2,48923.0,26072.0,8351.0,27.864531,344000.0,188755.0,8650.0
2,1003,11296.0,19.9,2499.0,3829.0,54.0,0.478045,-666666666.0,,618.0
3,1005,5228.0,44.1,70568.0,32169.0,230.0,4.399388,213700.0,,1832.0
4,1007,14888.0,42.5,80502.0,36359.0,1410.0,9.470715,258000.0,,6368.0


In [3]:
# Import the original data we analyzed earlier. Use dtype="object"
zipcode_data = pd.read_csv("bayarea_zipcodes.csv", dtype="object", encoding="utf-8")
zipcode_data.head()

Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat
0,NAPA,94558,CA,12313263537.0,995176.2253,-122.103292,38.513283
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511
2,DIXON,95620,CA,7236949521.0,441860.2014,-121.653355,38.313387
3,SONOMA,95476,CA,3001414165.0,311318.5463,-122.406843,38.155682
4,NAPA,94559,CA,1194301745.0,359104.6466,-122.293685,38.155238


In [12]:
# Merge the two data sets along zip code
census_data_complete = pd.merge(
    zipcode_data, census_pd, how="left", on=["Zipcode", "Zipcode"])
# Visualize
print(len(census_data_complete))
census_data_complete


187


Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,NAPA,94558,CA,12313263537,995176.2253,-122.103292,38.513283,68366.0,41.7,77299.0,38140.0,5198.0,7.603195,523500.0,,26276.0
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511,72349.0,33.9,57316.0,24388.0,11523.0,15.926965,249400.0,651035.0,22404.0
2,DIXON,95620,CA,7236949521,441860.2014,-121.653355,38.313387,21685.0,34.4,72583.0,27274.0,2949.0,13.599262,310700.0,,7865.0
3,SONOMA,95476,CA,3001414165,311318.5463,-122.406843,38.155682,37534.0,47.3,68894.0,45910.0,4130.0,11.003357,547800.0,,12538.0
4,NAPA,94559,CA,1194301745,359104.6466,-122.293685,38.155238,28355.0,37.4,65891.0,34815.0,2813.0,9.920649,484900.0,,10484.0
5,PETALUMA,94954,CA,2006544443,267474.4906,-122.457669,38.116895,37943.0,40.6,83713.0,36188.0,3055.0,8.051551,475800.0,,13988.0
6,RIO VISTA,94571,CA,4454445620,492056.7524,-121.862462,38.06603,8802.0,60.5,54852.0,34173.0,882.0,10.020450,271700.0,78505.0,2165.0
7,TRAVIS AFB,94535,CA,302939707.7,95232.00842,-121.896539,38.286568,4269.0,23.7,54692.0,18755.0,359.0,8.409464,-666666666.0,13490.0,1399.0
8,AMERICAN CANYON,94503,CA,693134128.3,136394.6951,-122.204187,38.209695,20366.0,35.9,83627.0,30601.0,1972.0,9.682805,374400.0,,7416.0
9,NOVATO,94949,CA,431605416.6,119395.6721,-122.486559,38.100527,16905.0,45.0,78568.0,55946.0,2029.0,12.002366,644800.0,164700.0,5789.0


In [16]:
census_data_complete = census_data_complete[(census_data_complete[["Median Home Value ($)"]]>0).all(1)]

print(len(census_data_complete))
census_data_complete

177


Unnamed: 0,PO_NAME,Zipcode,STATE,Area__,Length__,Lng,Lat,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,NAPA,94558,CA,12313263537,995176.2253,-122.103292,38.513283,68366.0,41.7,77299.0,38140.0,5198.0,7.603195,523500.0,,26276.0
1,FAIRFIELD,94533,CA,991786103.4,200772.5566,-121.947475,38.301511,72349.0,33.9,57316.0,24388.0,11523.0,15.926965,249400.0,651035.0,22404.0
2,DIXON,95620,CA,7236949521,441860.2014,-121.653355,38.313387,21685.0,34.4,72583.0,27274.0,2949.0,13.599262,310700.0,,7865.0
3,SONOMA,95476,CA,3001414165,311318.5463,-122.406843,38.155682,37534.0,47.3,68894.0,45910.0,4130.0,11.003357,547800.0,,12538.0
4,NAPA,94559,CA,1194301745,359104.6466,-122.293685,38.155238,28355.0,37.4,65891.0,34815.0,2813.0,9.920649,484900.0,,10484.0
5,PETALUMA,94954,CA,2006544443,267474.4906,-122.457669,38.116895,37943.0,40.6,83713.0,36188.0,3055.0,8.051551,475800.0,,13988.0
6,RIO VISTA,94571,CA,4454445620,492056.7524,-121.862462,38.06603,8802.0,60.5,54852.0,34173.0,882.0,10.020450,271700.0,78505.0,2165.0
8,AMERICAN CANYON,94503,CA,693134128.3,136394.6951,-122.204187,38.209695,20366.0,35.9,83627.0,30601.0,1972.0,9.682805,374400.0,,7416.0
9,NOVATO,94949,CA,431605416.6,119395.6721,-122.486559,38.100527,16905.0,45.0,78568.0,55946.0,2029.0,12.002366,644800.0,164700.0,5789.0
10,NOVATO,94945,CA,753717019.2,159439.8805,-122.486559,38.100527,19095.0,44.4,90685.0,45573.0,1531.0,8.017806,682900.0,,6127.0


In [17]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [31]:
# Store 'Lat' and 'Lng' into  locations 
locations = census_data_complete[["Lat", "Lng"]].astype(float)

# Convert Poverty Rate to float and store
# HINT: be sure to handle NaN values
median_home_value = census_data_complete["Median Home Value ($)"].astype(float)


In [40]:
# Create a poverty Heatmap layer
fig = gmaps.figure()

heat_layer = gmaps.heatmap_layer(locations, weights=median_home_value, dissipating=False, point_radius = 0.05)
#                               dissipating=False, max_intensity=1,
#                                  point_radius = 0.01)

# Adjust heat_layer setting to help with heatmap dissipating on zoom
# heat_layer.dissipating = False
# heat_layer.max_intensity = 1
# heat_layer.point_radius = 0.01

fig.add_layer(heat_layer)

fig

Figure(layout=FigureLayout(height='420px'))

In [5]:
# url = "http://www.zillow.com/webservice/GetRegionChildren.htm?zws-id=<X1-ZWz1gocnjquzgr_23up5>&state=wa&city=seattle&childtype=neighborhood"
# response = requests.get(url)
# response

In [6]:
# response = requests.get(url).json()
# response

In [7]:
address = '4691 hampshire way, fremont,ca'
zipcode = '94538'
zillow_data = ZillowWrapper("X1-ZWz1gocnjquzgr_23up5")
deep_search_response = zillow_data.get_deep_search_results(address,zipcode)
result = GetDeepSearchResults(deep_search_response)
result.last_sold_price
#response = requests.get(deep_search_response).json()


'360000'

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
weather_data = []
count = 0

settings = {"units": "metric", "appid": api_key}

print(f"Beginning Data Retrieval    ")
print(f"------------------------------")

for city in cities:
    count = count + 1    
    try:
        current_weather = owm.get_current(city, **settings)
        print(f"Processing Record {count} of Set {len(cities)} | {city}")
        weather_data.append(current_weather)
    except: 
        print(f"City not found. Skipping...")
        


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
summary = ["name","clouds.all","sys.country","dt","main.humidity", "coord.lat", "coord.lon", "main.temp_max", "wind.speed"]

data = [response(*summary) for response in weather_data]

weather_df = pd.DataFrame(data,columns=["City","Cloudiness","Country","Date","Humidity","Lat",
                                  "Lng","Max Temp","Wind Speed"])
weather_df.head()

In [None]:
weather_df.to_csv("Cities.csv", encoding='utf-8', index=False,header=True)

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Max Temperature (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Temperature (Celsius)")
plt.grid()

plt.savefig("Max_Temperature.png")

plt.show()

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Humidity"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Humidity (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()

plt.savefig("Humidity.png")

plt.show()

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Cloudiness"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Cloudiness (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()

plt.savefig("Cloudiness.png")

plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Wind Speed (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph))")
plt.grid()

plt.savefig("Wind_Speed.png")

plt.show()

## Three observable trends:
1. The city's temperature is higher as they close to the equator, and become lower as they further away from equator.
2. Most cities close to the equator have higher humidity(%). 
3. Cloudiness and Wind speed are not showing clear trend with the change of latitude. 
