In [9]:
# Dependencies and Setup
import matplotlib.pyplot as plt
import openweathermapy as owm
from pprint import pprint
from census import Census
import pandas as pd
import numpy as np
import requests
import json
import time
import csv

# Import API key
# Import pyzillow 
from pyzillow.pyzillow import ZillowWrapper, GetDeepSearchResults


# Census API Key
from config import api_key
c = Census(api_key, year=2013)

In [14]:
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                        "B19301_001E", "B17001_002E", "B25077_001E",
                        "B08136_003E", "B08301_003E"), 
                        {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B25077_001E": "Median Home Value ($)",
                                      "B08136_003E": "Time spent commuting (in minutes)",
                                      "B08301_003E": "Means of Transportation to Work",
                                      "NAME": "Name", "zip code tabulation area": "Zipcode"})

# Add in Poverty Rate (Poverty Count / Population)
census_pd["Poverty Rate"] = 100 * \
    census_pd["Poverty Count"].astype(
        int) / census_pd["Population"].astype(int)

# Final DataFrame
census_pd = census_pd[["Zipcode", "Population", "Median Age", "Household Income",
                       "Per Capita Income", "Poverty Count", "Poverty Rate","Median Home Value ($)",
                       "Time spent commuting (in minutes)","Means of Transportation to Work"]]

# Visualize
print(len(census_pd))
census_pd.head()

33120


Unnamed: 0,Zipcode,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,601,18450.0,36.6,12041.0,7380.0,10816.0,58.623306,104600.0,,3442.0
1,602,41302.0,38.6,15663.0,8463.0,22409.0,54.256452,90700.0,,8698.0
2,603,53683.0,38.9,15485.0,9176.0,26220.0,48.842278,120500.0,,9655.0
3,606,6591.0,37.3,15019.0,6383.0,3721.0,56.455773,97700.0,,1336.0
4,610,28963.0,39.2,16707.0,7892.0,14569.0,50.30211,109900.0,,6977.0


In [19]:
# Import the original data we analyzed earlier. Use dtype="object"
zipcode_data = pd.read_csv("bayarea_zipcodes.csv", dtype="object", encoding="utf-8")
zipcode_data.head()

Unnamed: 0,PO_NAME,the_geom,Zipcode,STATE,Area__,Length__
0,NAPA,MULTIPOLYGON (((-122.10329200180091 38.5132829...,94558,CA,12313263537.0,995176.2253
1,FAIRFIELD,MULTIPOLYGON (((-121.947475002335 38.301511000...,94533,CA,991786103.4,200772.5566
2,DIXON,MULTIPOLYGON (((-121.65335500334429 38.3133870...,95620,CA,7236949521.0,441860.2014
3,SONOMA,MULTIPOLYGON (((-122.406843003057 38.155681999...,95476,CA,3001414165.0,311318.5463
4,NAPA,MULTIPOLYGON (((-122.29368500225117 38.1552379...,94559,CA,1194301745.0,359104.6466


In [21]:
# Merge the two data sets along zip code
census_data_complete = pd.merge(
    zipcode_data, census_pd, how="left", on=["Zipcode", "Zipcode"])
# Visualize
print(len(census_data_complete))
census_data_complete.head()

187


Unnamed: 0,PO_NAME,the_geom,Zipcode,STATE,Area__,Length__,Population,Median Age,Household Income,Per Capita Income,Poverty Count,Poverty Rate,Median Home Value ($),Time spent commuting (in minutes),Means of Transportation to Work
0,NAPA,MULTIPOLYGON (((-122.10329200180091 38.5132829...,94558,CA,12313263537.0,995176.2253,66824.0,40.6,72286.0,35746.0,7000.0,10.475278,442500.0,,25096.0
1,FAIRFIELD,MULTIPOLYGON (((-121.947475002335 38.301511000...,94533,CA,991786103.4,200772.5566,69067.0,32.9,55413.0,22616.0,12074.0,17.481576,211100.0,551190.0,21477.0
2,DIXON,MULTIPOLYGON (((-121.65335500334429 38.3133870...,95620,CA,7236949521.0,441860.2014,20845.0,34.6,71261.0,27791.0,2316.0,11.110578,287700.0,,7479.0
3,SONOMA,MULTIPOLYGON (((-122.406843003057 38.155681999...,95476,CA,3001414165.0,311318.5463,38800.0,44.9,58785.0,40040.0,4702.0,12.118557,494800.0,,12390.0
4,NAPA,MULTIPOLYGON (((-122.29368500225117 38.1552379...,94559,CA,1194301745.0,359104.6466,27167.0,35.9,56895.0,29904.0,3100.0,11.410903,415200.0,,9625.0


In [5]:
# url = "http://www.zillow.com/webservice/GetRegionChildren.htm?zws-id=<X1-ZWz1gocnjquzgr_23up5>&state=wa&city=seattle&childtype=neighborhood"
# response = requests.get(url)
# response

In [6]:
# response = requests.get(url).json()
# response

In [7]:
address = '4691 hampshire way, fremont,ca'
zipcode = '94538'
zillow_data = ZillowWrapper("X1-ZWz1gocnjquzgr_23up5")
deep_search_response = zillow_data.get_deep_search_results(address,zipcode)
result = GetDeepSearchResults(deep_search_response)
result.last_sold_price
#response = requests.get(deep_search_response).json()


'360000'

In [None]:
# List for holding lat_lngs and cities
lat_lngs = []
cities = []

# Create a set of random lat and lng combinations
lats = np.random.uniform(low=-90.000, high=90.000, size=1500)
lngs = np.random.uniform(low=-180.000, high=180.000, size=1500)
lat_lngs = zip(lats, lngs)
# Identify nearest city for each lat, lng combination
for lat_lng in lat_lngs:
    city = citipy.nearest_city(lat_lng[0], lat_lng[1]).city_name
    
    # If the city is unique, then add it to a our cities list
    if city not in cities:
        cities.append(city)

# Print the city count to confirm sufficient count
len(cities)

### Perform API Calls
* Perform a weather check on each city using a series of successive API calls.
* Include a print log of each city as it'sbeing processed (with the city number and city name).


In [None]:
weather_data = []
count = 0

settings = {"units": "metric", "appid": api_key}

print(f"Beginning Data Retrieval    ")
print(f"------------------------------")

for city in cities:
    count = count + 1    
    try:
        current_weather = owm.get_current(city, **settings)
        print(f"Processing Record {count} of Set {len(cities)} | {city}")
        weather_data.append(current_weather)
    except: 
        print(f"City not found. Skipping...")
        


### Convert Raw Data to DataFrame
* Export the city data into a .csv.
* Display the DataFrame

In [None]:
summary = ["name","clouds.all","sys.country","dt","main.humidity", "coord.lat", "coord.lon", "main.temp_max", "wind.speed"]

data = [response(*summary) for response in weather_data]

weather_df = pd.DataFrame(data,columns=["City","Cloudiness","Country","Date","Humidity","Lat",
                                  "Lng","Max Temp","Wind Speed"])
weather_df.head()

In [None]:
weather_df.to_csv("Cities.csv", encoding='utf-8', index=False,header=True)

### Plotting the Data
* Use proper labeling of the plots using plot titles (including date of analysis) and axes labels.
* Save the plotted figures as .pngs.

#### Latitude vs. Temperature Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Max Temp"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Max Temperature (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Temperature (Celsius)")
plt.grid()

plt.savefig("Max_Temperature.png")

plt.show()

#### Latitude vs. Humidity Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Humidity"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Humidity (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Humidity (%)")
plt.grid()

plt.savefig("Humidity.png")

plt.show()

#### Latitude vs. Cloudiness Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Cloudiness"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Cloudiness (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Cloudiness (%)")
plt.grid()

plt.savefig("Cloudiness.png")

plt.show()

#### Latitude vs. Wind Speed Plot

In [None]:
plt.scatter(weather_df["Lat"], weather_df["Wind Speed"], marker="o",edgecolors="black",)

plt.title("City Latitude vs. Wind Speed (09/21/2018)")
plt.xlabel("Latitude")
plt.ylabel("Wind Speed (mph))")
plt.grid()

plt.savefig("Wind_Speed.png")

plt.show()

## Three observable trends:
1. The city's temperature is higher as they close to the equator, and become lower as they further away from equator.
2. Most cities close to the equator have higher humidity(%). 
3. Cloudiness and Wind speed are not showing clear trend with the change of latitude. 
