# Census Demographics & Restaraunt Info

In [1]:
# Dependencies
import matplotlib.pyplot as plt
import gmaps
import numpy as np
import pandas as pd
import requests
import time
import json
from census import Census
from jconfig import (api_key, gkey)
from uszipcode import SearchEngine, SimpleZipcode, Zipcode


# Census API Key
c = Census(api_key, year=2017)

# Define target Market Areas and create DataFrame 

In [2]:
#read in zip.csv
zip_pd = pd.read_csv("zip.csv")

#clear any null values
zip_pd.dropna(inplace= True)

#filter df to only include standard zip codes(exclude unique and p.o. boxes)
std_pd= zip_pd.loc[zip_pd.Type=="Standard"]

In [3]:
#filter to only include zip codes in Dallas County
county_zip_pd= std_pd.loc[(std_pd["County"]== "Dallas") | (std_pd["County"]== "Tarrant") | (std_pd["County"]== "Collin") | (std_pd["County"]== "Denton")]

#extract all zip codes in Dallas County to a list
county_zips = county_zip_pd["Zip Code"].tolist()
city_zips = county_zip_pd["City"].tolist()

#create lists for lat and long per zip code
lat=[]
long=[]
for z in county_zips:
    search= SearchEngine()
    zipcode= search.by_zipcode(z)
    latitude= zipcode.lat
    lat.append(latitude)
    longitude= zipcode.lng
    long.append(longitude)
    
#create dictionary
county_zips_dict={"City": city_zips,"Zip_Codes": county_zips,"Lat":lat, "Lon":long}

#convert list to DF in order to merge later with Census data
county_zip_df= pd.DataFrame(county_zips_dict)
county_zip_df.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon
0,Addison,75001,32.96,-96.84
1,Allen,75002,33.08,-96.61
2,Carrollton,75006,32.95,-96.89
3,Carrollton,75007,33.0,-96.9
4,Celina,75009,33.3,-96.8


In [4]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
county_zip_df.to_csv("CityCounty_zip_data.csv", encoding="utf-8", index=False)

# Create Census DataFrame for Market Areas

In [5]:
# Run Census Search to retrieve data on all cities
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "NAME": "Name", "zip code tabulation area": "Zip_Codes"})
#convert Zip Codes to numeric, in order to merge later
census_pd.Zip_Codes= pd.to_numeric(census_pd.Zip_Codes)
census_pd.head()

Unnamed: 0,Median Age,Population,Household Income,Per Capita Income,Name,Zip_Codes
0,38.9,17599.0,11757.0,7041.0,ZCTA5 00601,601
1,40.9,39209.0,16190.0,8978.0,ZCTA5 00602,602
2,40.4,50135.0,16645.0,10897.0,ZCTA5 00603,603
3,42.8,6304.0,13387.0,5960.0,ZCTA5 00606,606
4,41.4,27590.0,18741.0,9266.0,ZCTA5 00610,610


In [6]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

In [7]:
# merge Zip DF with Census DF
zip_census_merge= pd.merge(county_zip_df,census_pd, on="Zip_Codes")
zip_census_merge.head()

Unnamed: 0,City,Zip_Codes,Lat,Lon,Median Age,Population,Household Income,Per Capita Income,Name
0,Addison,75001,32.96,-96.84,32.9,14617.0,73578.0,50313.0,ZCTA5 75001
1,Allen,75002,33.08,-96.61,35.7,69688.0,98652.0,39818.0,ZCTA5 75002
2,Carrollton,75006,32.95,-96.89,34.8,50788.0,57415.0,27810.0,ZCTA5 75006
3,Carrollton,75007,33.0,-96.9,39.5,53744.0,82079.0,36289.0,ZCTA5 75007
4,Celina,75009,33.3,-96.8,37.5,10557.0,90717.0,36058.0,ZCTA5 75009


# Population DataFrame 

In [None]:
# Male population data for 15-44yr olds
male_age_data = c.acs5.get(("NAME", "B01001_006E","B01001_007E","B01001_008E","B01001_009E","B01001_010E","B01001_011E",
                      "B01001_012E","B01001_013E","B01001_014E"), {'for': 'zip code tabulation area:*'})

# Female population data for 15-44yr olds
female_age_data = c.acs5.get(("NAME", "B01001_030E","B01001_031E","B01001_032E","B01001_033E","B01001_034E","B01001_035E",
                      "B01001_036E","B01001_037E","B01001_038E"), {'for': 'zip code tabulation area:*'})
# Convert to DataFrame
male_pd = pd.DataFrame(male_age_data)
male_pd= male_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})
female_pd= pd.DataFrame(female_age_data)
female_pd= female_pd.rename(columns={"zip code tabulation area": "Zip_Codes"})

In [None]:
# Total population by gender
male_pd["Total_Male"]= male_pd.B01001_006E + male_pd.B01001_007E + male_pd.B01001_008E + male_pd.B01001_009E \
    + male_pd.B01001_010E + male_pd.B01001_011E + male_pd.B01001_012E + male_pd.B01001_013E + male_pd.B01001_014E
female_pd["Total_Female"]= female_pd.B01001_030E + female_pd.B01001_031E + female_pd.B01001_032E + female_pd.B01001_033E \
    + female_pd.B01001_034E + female_pd.B01001_035E + female_pd.B01001_036E + female_pd.B01001_037E + female_pd.B01001_038E

# merge Male and Female df's
gender_merge= pd.merge(male_pd,female_pd, on="Zip_Codes")

# calculate new column for total 15-44 yr olds
gender_merge.loc[:,"Pop_15to44"]=gender_merge.Total_Male + gender_merge.Total_Female

# filter DF to include only Total and Zip Codes
gender_df = gender_merge[["Pop_15to44", "Zip_Codes"]]

# convert Zip_codes to numeric
gender_df.Zip_Codes= pd.to_numeric(gender_df.Zip_Codes)

# merge gender_df with zip_census_merge
demographic_df = pd.merge(zip_census_merge,gender_df, on="Zip_Codes")
demographic_df.head(10)

In [None]:
# Create a new DF and grabbing the Location Data to check for 0
newdf=demographic_df[['City','Lat','Lon','Pop_15to44','Population']]
newdf = newdf[newdf.Pop_15to44 != 0]
newdf.head()

# Create a Heatmap of Popluation for Market Areas 

In [None]:
# Configure gmaps with API key
gmaps.configure(api_key=gkey)

In [None]:
pop_rate = newdf['Population'] / 10000
coord_pair = [(xy) for xy in zip(newdf.Lat, newdf.Lon)]
newdf["Coordinates"]= coord_pair

locations = newdf["Coordinates"]
newdf.head()

In [None]:
figure_layout={
   'width': '800px',
   'height': '800px',
   'border': '1px solid black',
   'padding': '1px'
}
fig = gmaps.figure(map_type="TERRAIN", layout=figure_layout)
heat_layer= gmaps.heatmap_layer(locations,weights = newdf.Population,
                        dissipating=False, max_intensity=.001,
                       point_radius=.050)

fig.add_layer(heat_layer)
fig

# Find the Top Zip Codes by Population Denisty & Household Income 

In [None]:
#getting the top ten household incomes
sorted_max = zip_census_merge.sort_values('Household Income', ascending=False)
max = sorted_max[0:10]
max

In [None]:
# getting the ten lowest incomes 
sorted_min = zip_census_merge.sort_values('Household Income', ascending=True)
sorted_min.head(5)

In [None]:
#dropping the first three rows for better data
new_sorted_min = sorted_min.iloc[3:]
new_sorted_min
min = new_sorted_min[0:10]
min

In [None]:
max.head()

In [None]:
max['Zip_Codes']

In [None]:
users = max['Household Income']
x_axis = np.arange(len(users))
plt.bar(x_axis, users, color='teal', alpha=0.5, align="center")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations,['76092', '75225', '76034', '75022', '75182', '75078', '76226', '75205', '75094', '75013'], rotation=45, ha='right')
plt.title("Top 10 Household Incomes By Zip Code", fontsize=18, fontweight='bold')
plt.xlabel("Zip Codes", fontsize=14)
plt.ylabel("Household Income", fontsize=14)
plt.show()

In [None]:
s_max = max.sort_values('Population', ascending=False)
s_max.head(10)

In [None]:
s_max['Zip_Codes']

In [None]:
users = s_max['Population']
x_axis = np.arange(len(users))
plt.bar(x_axis, users, color='darkblue', alpha=0.5, align="center")
tick_locations = [value for value in x_axis]
plt.xticks(tick_locations,['75013','76092','76034','76226','75022','75205','75094','75225','75078','75182'], rotation=45, ha='right')
plt.title("Population Density In Top Household Incomes", fontsize=18, fontweight='bold')
plt.xlabel("Zip Codes", fontsize=14)
plt.ylabel("Population Density", fontsize=14)
plt.show()

In [None]:
#getting the top ten for population
sorted_population = zip_census_merge.sort_values('Population', ascending=False)
max = sorted_population[0:9]
max

# Highest Rated Cuisine 

# Count of Nearby Restaurants for the Top Zip Codes 

In [None]:
# geocoordinates
target_coordinates = "33.12,-96.69" # TOP ZIP CODE BY HOUSEHLD INCOME 
target_radius = 1
target_type = "restaurant"

# set up a parameters dictionary
params = {
    "location": target_coordinates,
    "radius": target_radius,
    "type": target_type,
    "key": gkey
}

# base url
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# run a request using our params dictionary
response = requests.get(base_url, params=params)

# convert response to json
places_count = response.json()

# Print the json (pretty printed)
len(json.dumps(places_count, sort_keys=True))

In [None]:
# geocoordinates
target_coordinates = "32.7766642,-96.79698789999999" # Coordinates for Dallas, TX 
target_radius = 5
target_type = "restaurant"

# set up a parameters dictionary
params = {
    "location": target_coordinates,
    "radius": target_radius,
    "type": target_type,
    "key": gkey
}

# base url
base_url = "https://maps.googleapis.com/maps/api/place/nearbysearch/json"

# run a request using our params dictionary
response = requests.get(base_url, params=params)

# convert response to json
places_count = response.json()

# Print the json (pretty printed)
len(json.dumps(places, sort_keys=True))