In [1]:
# Dependencies
import json
import requests
from api_keys import api_key
import pandas as pd
from uszipcode import SearchEngine
from census import Census
from api_keys import census_key
# Incorporated citipy to determine city based on latitude and longitude
from citipy import citipy

In [2]:
#using census api to get zipcode
c = Census(census_key, year=2015)
census_data = c.acs5.get("B19013_001E",
                          {'for': 'zip code tabulation area:*'})
census_df = pd.DataFrame(census_data)
census_df=census_df['zip code tabulation area']
census_df=pd.DataFrame(census_df)
census_df.rename(columns={'zip code tabulation area':'Zipcode'}, inplace=True)

In [3]:
census_df

Unnamed: 0,Zipcode
0,12810
1,12811
2,12812
3,12814
4,12815
...,...
33115,71404
33116,28735
33117,34139
33118,40007


In [4]:
#using uszipcode to get county name for corresponding zipcode
search = SearchEngine()
results = search.by_zipcode("92122")
results

SimpleZipcode(zipcode='92122', zipcode_type='Standard', major_city='San Diego', post_office_city='San Diego, CA', common_city_list=['San Diego'], county='San Diego County', state='CA', lat=32.86, lng=-117.2, timezone='Pacific', radius_in_miles=2.0, area_code_list=['619', '760', '858'], population=43728, population_density=2745.0, land_area_in_sqmi=15.93, water_area_in_sqmi=0.01, housing_units=21058, occupied_housing_units=19725, median_home_value=576200, median_household_income=65871, bounds_west=-117.23664, bounds_east=-117.107508, bounds_north=32.880848, bounds_south=32.837185)

In [5]:
#Adding new city, county, lat, lng, housing units columns  to census 
census_df["City"]=''
census_df["County"]=''
census_df['Lat']=''
census_df['Lng']=''
census_df['State']=''
for index, row in census_df.iterrows():

    # get zipcode type from df
    zipcode = int(row['Zipcode'])
    #Searching by zipcode
    search = SearchEngine()
    results = search.by_zipcode(str(zipcode))
    #adding to City columns
    try:
        census_df.loc[index, 'City'] = results.city
        census_df.loc[index, 'County'] = results.county
        census_df.loc[index, 'Lat'] = results.lat
        census_df.loc[index, 'Lng'] = results.lng
        census_df.loc[index, 'State'] = results.state
    except (KeyError, IndexError):
        print("Missing field/result for... skipping.")

In [6]:
census_df

Unnamed: 0,Zipcode,City,County,Lat,Lng,State
0,12810,Athol,Warren County,43.48,-73.88,NY
1,12811,Bakers Mills,Warren County,43.622,-74.035,NY
2,12812,Blue Mountain Lake,Hamilton County,43.9,-74.3,NY
3,12814,Bolton Landing,Warren County,43.6,-73.6,NY
4,12815,Brant Lake,Warren County,43.69,-73.71,NY
...,...,...,...,...,...,...
33115,71404,Atlanta,Winn Parish,31.8,-92.7,LA
33116,28735,Gerton,Henderson County,35.48,-82.36,NC
33117,34139,Everglades City,Collier County,25.86,-81.39,FL
33118,40007,Bethlehem,Henry County,38.45,-85.02,KY


In [7]:
#grouping the data with county and graping only one zipcode for that county
groupby_data=census_df.groupby(["County", "State"]).agg(
                                {'Zipcode': lambda x : x.unique()[0]})
groupby_data.reset_index(inplace=True)  


In [8]:
groupby_data

Unnamed: 0,County,State,Zipcode
0,,OK,73047
1,Aasco Municipio,PR,00610
2,Abbeville County,SC,29620
3,Acadia Parish,LA,70516
4,Accomack County,VA,23301
...,...,...,...
3217,Yuma County,AZ,85333
3218,Yuma County,CO,80727
3219,Zapata County,TX,78067
3220,Zavala County,TX,78829


In [9]:
#url for weather api
url = "http://api.openweathermap.org/data/2.5/weather?"
units='metric'


In [10]:
#sample weathe api
sdzip='00610'
response=requests.get(url, params={'appid': api_key, 
                                       'zip':f"{sdzip},us", 'units':units})
weather=response.json()
print(response.url)
print(json.dumps(weather, indent=4))

http://api.openweathermap.org/data/2.5/weather?appid=1f642ecec14736cd7b6d0944b33d9cc1&zip=00610%2Cus&units=metric
{
    "coord": {
        "lon": -67.14,
        "lat": 18.28
    },
    "weather": [
        {
            "id": 802,
            "main": "Clouds",
            "description": "scattered clouds",
            "icon": "03n"
        }
    ],
    "base": "stations",
    "main": {
        "temp": 25.15,
        "feels_like": 26.02,
        "temp_min": 23.55,
        "temp_max": 26.77,
        "pressure": 1014,
        "humidity": 88
    },
    "visibility": 10000,
    "wind": {
        "speed": 0.89,
        "deg": 58,
        "gust": 2.24
    },
    "clouds": {
        "all": 31
    },
    "dt": 1637456538,
    "sys": {
        "type": 2,
        "id": 2037648,
        "country": "US",
        "sunrise": 1637404678,
        "sunset": 1637445053
    },
    "timezone": -14400,
    "id": 0,
    "name": "Anasco",
    "cod": 200
}


In [11]:
#converting us zipcodes to list
us_zipcodes=groupby_data["Zipcode"].to_list()

#using weather api and saving the data in list
name=[]
temp_max=[]
temp_min=[]
description=[]
feels_like=[]
lon=[]
lat=[]
sunrise=[]
sunset=[]
us_zip=[]

for uszip in us_zipcodes:
    weather=requests.get(url, params={'appid': api_key, 
                                       'zip':f"{uszip},us", 'units':units}).json()
    try:
        us_zip.append(uszip)
        name.append(weather['name'])
        temp_max.append(weather['main']['temp_max'])
        temp_min.append(weather['main']['temp_min'])
        description.append(weather['weather'][0]['description'])
        feels_like.append(weather['main']['feels_like'])
        lon.append(weather['coord']['lon'])
        lat.append(weather['coord']['lat'])
        sunrise.append(weather['sys']['sunrise'])
        sunset.append(weather['sys']['sunset'])
    except:
        print(f'{us_zip} doesnot exisit')


In [12]:
#creating dataframe for weather 
weather_data=pd.DataFrame({"Zipcode":us_zip,'Name':name,'Max_temp':temp_max,
                          "Min_temp":temp_min,"Description":description,
                          "Feels_like":feels_like, 'Sunrise':sunrise,
                          "Sunset":sunset, "Lon":lon, "Lat":lat})

In [39]:
#merging county data and weather data with zip column
final_weather=pd.merge(groupby_data, weather_data, how='inner', on='Zipcode')
final_weather

Unnamed: 0,County,State,Zipcode,Name,Max_temp,Min_temp,Description,Feels_like,Sunrise,Sunset,Lon,Lat
0,,OK,73047,Hinton,14.89,11.07,overcast clouds,11.36,1637414050,1637450655,-98.3313,35.4675
1,Aasco Municipio,PR,00610,Anasco,26.77,23.55,scattered clouds,26.02,1637404678,1637445053,-67.1400,18.2800
2,Abbeville County,SC,29620,Abbeville,7.47,3.32,overcast clouds,2.19,1637410053,1637446993,-82.3785,34.1819
3,Acadia Parish,LA,70516,Branch,18.32,14.06,clear sky,15.74,1637411983,1637449848,-92.3459,30.3446
4,Accomack County,VA,23301,Accomac,7.93,2.97,overcast clouds,4.33,1637408913,1637444918,-75.6803,37.7159
...,...,...,...,...,...,...,...,...,...,...,...,...
3217,Yuma County,AZ,85333,Dateland,23.93,23.93,overcast clouds,22.90,1637417355,1637454614,-113.4631,32.8679
3218,Yuma County,CO,80727,Eckley,7.60,5.50,overcast clouds,3.19,1637415696,1637451002,-102.4828,40.1138
3219,Zapata County,TX,78067,San Ygnacio,23.94,22.08,clear sky,23.49,1637413302,1637451878,-99.3202,27.1583
3220,Zavala County,TX,78829,Batesville,18.79,17.43,clear sky,18.20,1637413567,1637451752,-99.6115,28.9286


In [40]:
#dropping first row with null county value
final_weather.drop(final_weather.index[0], inplace=True)

In [43]:
#removing last word county from county columns
final_weather["County"]=final_weather["County"].str.split("County").str[0]

In [44]:
final_weather

Unnamed: 0,County,State,Zipcode,Name,Max_temp,Min_temp,Description,Feels_like,Sunrise,Sunset,Lon,Lat
1,Aasco Municipio,PR,00610,Anasco,26.77,23.55,scattered clouds,26.02,1637404678,1637445053,-67.1400,18.2800
2,Abbeville,SC,29620,Abbeville,7.47,3.32,overcast clouds,2.19,1637410053,1637446993,-82.3785,34.1819
3,Acadia Parish,LA,70516,Branch,18.32,14.06,clear sky,15.74,1637411983,1637449848,-92.3459,30.3446
4,Accomack,VA,23301,Accomac,7.93,2.97,overcast clouds,4.33,1637408913,1637444918,-75.6803,37.7159
5,Ada,ID,83616,Eagle,6.29,2.65,clear sky,3.59,1637419602,1637453759,-116.3620,43.7069
...,...,...,...,...,...,...,...,...,...,...,...,...
3217,Yuma,AZ,85333,Dateland,23.93,23.93,overcast clouds,22.90,1637417355,1637454614,-113.4631,32.8679
3218,Yuma,CO,80727,Eckley,7.60,5.50,overcast clouds,3.19,1637415696,1637451002,-102.4828,40.1138
3219,Zapata,TX,78067,San Ygnacio,23.94,22.08,clear sky,23.49,1637413302,1637451878,-99.3202,27.1583
3220,Zavala,TX,78829,Batesville,18.79,17.43,clear sky,18.20,1637413567,1637451752,-99.6115,28.9286


In [46]:
#saving as a csv in cleaned data folder
final_weather.to_csv("cleaned_data/final_weather_data.csv",encoding="utf-8", index=False)