In [1]:
# Dependencies
import pandas as pd
from sqlalchemy import create_engine
from census import Census

# Import Census API Key and postgresql database password
from config import (api_key, password)

### Use Census API to get social economic data

In [None]:
c = Census(api_key, year=2018)

In [None]:
# Run Census Search to retrieve data on all zip codes (2013 ACS5 Census)
# See: https://github.com/CommerceDataService/census-wrapper for library documentation
# See: https://gist.github.com/afhaque/60558290d6efd892351c4b64e5c01e9b for labels
census_data = c.acs5.get(("NAME", "B19013_001E", "B01003_001E", "B01002_001E",
                          "B19301_001E",
                          "B17001_002E",
                          "B23025_005E"), {'for': 'zip code tabulation area:*'})

# Convert to DataFrame
census_pd = pd.DataFrame(census_data)

# Column Reordering
census_pd = census_pd.rename(columns={"B01003_001E": "Population",
                                      "B01002_001E": "Median Age",
                                      "B19013_001E": "Household Income",
                                      "B19301_001E": "Per Capita Income",
                                      "B17001_002E": "Poverty Count",
                                      "B23025_005E": "Unemployment Count",
                                      "NAME": "Name", "zip code tabulation area": "zip_code"})

# # Add in Poverty Rate (Poverty Count / Population)
# census_pd["Poverty Rate"] = 100 * \
#     census_pd["Poverty Count"].astype(
#         int) / census_pd["Population"].astype(int)

# # Add in Unemployment Rate (Unemployment Count / Population)
# census_pd["Unemployment Rate"] = 100 * \
#     census_pd["Unemployment Count"].astype(
#         int) / census_pd["Population"].astype(int)

# # Final DataFrame
# census_pd = census_pd[["zip_code", "Name", "Population", "Median Age", "Household Income",
#                        "Per Capita Income", "Poverty Count", "Poverty Rate", "Unemployment Rate"]]

# # Visualize
# print(len(census_pd))

census_pd.head()

In [None]:
# Save as a csv
# Note to avoid any issues later, use encoding="utf-8"
census_pd.to_csv("census_data.csv", encoding="utf-8", index=False)

### Extract CSVs into DataFrame

In [2]:
csv_file_one = "data/datafiniti-fast-food-restaurants-across-america/Datafiniti_Fast_Food_Restaurants.csv"
df_one = pd.read_csv(csv_file_one)
df_one.head()

Unnamed: 0,id,dateAdded,dateUpdated,address,categories,city,country,keys,latitude,longitude,name,postalCode,province,sourceURLs,websites
0,AVwcmSyZIN2L1WUfmxyw,2015-10-19T23:47:58Z,2018-06-26T03:00:14Z,800 N Canal Blvd,American Restaurant and Fast Food Restaurant,Thibodaux,US,us/la/thibodaux/800ncanalblvd/1780593795,29.814697,-90.814742,SONIC Drive In,70301,LA,https://foursquare.com/v/sonic-drive-in/4b7361...,https://locations.sonicdrivein.com/la/thibodau...
1,AVwcmSyZIN2L1WUfmxyw,2015-10-19T23:47:58Z,2018-06-26T03:00:14Z,800 N Canal Blvd,Fast Food Restaurants,Thibodaux,US,us/la/thibodaux/800ncanalblvd/1780593795,29.814697,-90.814742,SONIC Drive In,70301,LA,https://foursquare.com/v/sonic-drive-in/4b7361...,https://locations.sonicdrivein.com/la/thibodau...
2,AVwcopQoByjofQCxgfVa,2016-03-29T05:06:36Z,2018-06-26T02:59:52Z,206 Wears Valley Rd,Fast Food Restaurant,Pigeon Forge,US,us/tn/pigeonforge/206wearsvalleyrd/-864103396,35.803788,-83.580553,Taco Bell,37863,TN,https://www.yellowpages.com/pigeon-forge-tn/mi...,"http://www.tacobell.com,https://locations.taco..."
3,AVweXN5RByjofQCxxilK,2017-01-03T07:46:11Z,2018-06-26T02:59:51Z,3652 Parkway,Fast Food,Pigeon Forge,US,us/tn/pigeonforge/3652parkway/93075755,35.782339,-83.551408,Arby's,37863,TN,http://www.yellowbook.com/profile/arbys_163389...,"http://www.arbys.com,https://locations.arbys.c..."
4,AWQ6MUvo3-Khe5l_j3SG,2018-06-26T02:59:43Z,2018-06-26T02:59:43Z,2118 Mt Zion Parkway,Fast Food Restaurant,Morrow,US,us/ga/morrow/2118mtzionparkway/1305117222,33.562738,-84.321143,Steak 'n Shake,30260,GA,https://foursquare.com/v/steak-n-shake/4bcf77a...,http://www.steaknshake.com/locations/23851-ste...


In [3]:
df_one.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 15 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   id           10000 non-null  object 
 1   dateAdded    10000 non-null  object 
 2   dateUpdated  10000 non-null  object 
 3   address      10000 non-null  object 
 4   categories   10000 non-null  object 
 5   city         10000 non-null  object 
 6   country      10000 non-null  object 
 7   keys         10000 non-null  object 
 8   latitude     10000 non-null  float64
 9   longitude    10000 non-null  float64
 10  name         10000 non-null  object 
 11  postalCode   10000 non-null  object 
 12  province     10000 non-null  object 
 13  sourceURLs   10000 non-null  object 
 14  websites     10000 non-null  object 
dtypes: float64(2), object(13)
memory usage: 1.1+ MB


In [4]:
csv_file_two = "data/datafiniti-fast-food-restaurants-across-america/Datafiniti_Fast_Food_Restaurants_Jun19.csv"
df_two = pd.read_csv(csv_file_two)
df_two.head()

Unnamed: 0,id,dateAdded,dateUpdated,address,categories,primaryCategories,city,country,keys,latitude,longitude,name,postalCode,province,sourceURLs,websites
0,AWrSh_KgsVYjT2BJAzaH,2019-05-19T23:58:05Z,2019-05-19T23:58:05Z,2555 11th Avenue,"Fast Food Restaurants,Hamburgers and Hot Dogs,...",Accommodation & Food Services,Greeley,US,us/co/greeley/255511thavenue/554191587,40.39629,-104.69699,Carl's Jr.,80631,CO,https://www.yellowpages.com/greeley-co/mip/car...,https://www.carlsjr.com/?utm_source=Yextandutm...
1,AWEKlA-LIxWefVJwxG9B,2018-01-18T18:30:23Z,2019-05-19T23:45:05Z,2513 Highway 6 And 50,"Restaurant,Mexican Restaurants,Fast Food Resta...",Accommodation & Food Services,Grand Junction,US,us/co/grandjunction/2513highway6and50/1550891556,39.08135,-108.58689,Del Taco,81505,CO,http://www.citysearch.com/profile/772076870/gr...,http://www.deltaco.com
2,AWrSfAcYsVYjT2BJAzPt,2019-05-19T23:45:04Z,2019-05-19T23:45:04Z,1125 Patterson Road,"Sandwich Shops,Fast Food Restaurants,Restauran...",Accommodation & Food Services,Grand Junction,US,us/co/grandjunction/1125pattersonroad/-2137447852,39.09148,-108.55411,Which Wich,81506,CO,https://www.yellowpages.com/grand-junction-co/...,http://www.whichwich.com
3,AWrSa3NAQTFama1Xpkbz,2019-05-19T23:26:58Z,2019-05-19T23:26:58Z,3455 N Salida Court,"Fast Food Restaurants,Mexican Restaurants,Rest...",Accommodation & Food Services,Aurora,US,us/co/aurora/3455nsalidacourt/1143321601,39.76369,-104.77671,Chipotle Mexican Grill,80011,CO,https://www.yellowpages.com/aurora-co/mip/chip...,http://www.chipotle.com
4,AWrSaVGzZ4Yw-wtdgcaB,2019-05-19T23:24:38Z,2019-05-19T23:24:38Z,5225 E Colfax Avenue,"Fast Food Restaurants,Mexican Restaurants,Rest...",Accommodation & Food Services,Denver,US,us/co/denver/5225ecolfaxavenue/-864103396,39.74044,-104.92636,Taco Bell,80220,CO,https://www.yellowpages.com/denver-co/mip/taco...,https://locations.tacobell.com/co/denver/5225-...


In [5]:
df_two.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 16 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   id                 10000 non-null  object 
 1   dateAdded          10000 non-null  object 
 2   dateUpdated        10000 non-null  object 
 3   address            10000 non-null  object 
 4   categories         10000 non-null  object 
 5   primaryCategories  10000 non-null  object 
 6   city               10000 non-null  object 
 7   country            10000 non-null  object 
 8   keys               10000 non-null  object 
 9   latitude           10000 non-null  float64
 10  longitude          10000 non-null  float64
 11  name               10000 non-null  object 
 12  postalCode         10000 non-null  object 
 13  province           10000 non-null  object 
 14  sourceURLs         10000 non-null  object 
 15  websites           9918 non-null   object 
dtypes: float64(2), object(1

In [6]:
csv_file_three = "data/datafiniti-fast-food-restaurants-across-america/FastFoodRestaurants.csv"
df_three = pd.read_csv(csv_file_three)
df_three.head()

Unnamed: 0,address,city,country,keys,latitude,longitude,name,postalCode,province,websites
0,324 Main St,Massena,US,us/ny/massena/324mainst/-1161002137,44.9213,-74.89021,McDonald's,13662,NY,"http://mcdonalds.com,http://www.mcdonalds.com/..."
1,530 Clinton Ave,Washington Court House,US,us/oh/washingtoncourthouse/530clintonave/-7914...,39.53255,-83.44526,Wendy's,43160,OH,http://www.wendys.com
2,408 Market Square Dr,Maysville,US,us/ky/maysville/408marketsquaredr/1051460804,38.62736,-83.79141,Frisch's Big Boy,41056,KY,"http://www.frischs.com,https://www.frischs.com..."
3,6098 State Highway 37,Massena,US,us/ny/massena/6098statehighway37/-1161002137,44.95008,-74.84553,McDonald's,13662,NY,"http://mcdonalds.com,http://www.mcdonalds.com/..."
4,139 Columbus Rd,Athens,US,us/oh/athens/139columbusrd/990890980,39.35155,-82.09728,OMG! Rotisserie,45701,OH,"http://www.omgrotisserie.com,http://omgrotisse..."


In [7]:
df_three.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 10000 entries, 0 to 9999
Data columns (total 10 columns):
 #   Column      Non-Null Count  Dtype  
---  ------      --------------  -----  
 0   address     10000 non-null  object 
 1   city        10000 non-null  object 
 2   country     10000 non-null  object 
 3   keys        10000 non-null  object 
 4   latitude    10000 non-null  float64
 5   longitude   10000 non-null  float64
 6   name        10000 non-null  object 
 7   postalCode  10000 non-null  object 
 8   province    10000 non-null  object 
 9   websites    9535 non-null   object 
dtypes: float64(2), object(8)
memory usage: 781.4+ KB


In [8]:
df_three["province"].value_counts()

CA         676
TX         634
OH         543
FL         471
IN         379
IL         363
NC         358
GA         347
MO         334
KY         332
VA         327
PA         283
NY         269
MI         251
TN         245
SC         238
LA         237
AL         236
WA         209
OK         208
AZ         208
WI         186
IA         166
UT         159
MD         159
CO         158
AR         151
NJ         151
NM         149
MN         148
NV         135
MA         131
OR         114
SD         105
KS         103
ID          99
CT          96
WV          93
NE          92
MS          82
ND          50
ME          44
VT          43
WY          41
HI          40
NH          36
DE          32
MT          25
RI          24
DC          21
AK          14
Co Spgs      5
Name: province, dtype: int64

In [9]:
df_three["province"].nunique()

52

In [10]:
df_three["postalCode"].nunique()

5289

In [11]:
df_three["postalCode"].value_counts()

57701         23
41042         19
45069         16
99336         15
23185         15
              ..
68005          1
75402-7208     1
73134-8021     1
38916          1
75657          1
Name: postalCode, Length: 5289, dtype: int64

In [12]:
df_three[df_three["name"]=="Carl's Jr."]

Unnamed: 0,address,city,country,keys,latitude,longitude,name,postalCode,province,websites
623,4065 Cameron Park Dr,Shingle Springs,US,us/ca/shinglesprings/4065cameronparkdr/554191587,38.657300,-120.968900,Carl's Jr.,95682,CA,http://www.carlsjr.com
693,2300 Grass Valley Hwy,Auburn,US,us/ca/auburn/2300grassvalleyhwy/554191587,38.935046,-121.092638,Carl's Jr.,95603,CA,http://www.carlsjr.com
1961,2600 Main St NW,Los Lunas,US,us/nm/loslunas/2600mainstnw/554191587,34.814754,-106.762205,Carl's Jr.,87031,NM,http://www.carlsjr.com
2027,22667 NE Glisan St,Troutdale,US,us/or/troutdale/22667neglisanst/554191587,45.528200,-122.430300,Carl's Jr.,97060,OR,http://www.carlsjr.com
2081,1850 S Muskogee Ave,Tahlequah,US,us/ok/tahlequah/1850smuskogeeave/554191587,35.892136,-94.977680,Carl's Jr.,74464,OK,"http://www.carlsjr.com,http://api.citygridmedi..."
...,...,...,...,...,...,...,...,...,...,...
9728,1900 W Lincoln Ave,Anaheim,US,us/ca/anaheim/1900wlincolnave/554191587,33.832435,-117.950142,Carl's Jr.,92801,CA,"http://www.carlsjr.com,http://www.carlsjr.com/"
9741,5795 S Eastern Ave,Las Vegas,US,us/nv/lasvegas/5795seasternave/554191587,36.084460,-115.118790,Carl's Jr.,89119,NV,"http://carlsjr.com,http://carlsjr.com/"
9893,20550 Mission Blvd,Hayward,US,us/ca/hayward/20550missionblvd/554191587,37.685595,-122.100019,Carl's Jr.,94541,CA,"http://carlsjr.com,http://www.carlsjr.com/,htt..."
9909,805 W Broadway St,Idaho Falls,US,us/id/idahofalls/805wbroadwayst/554191587,43.494190,-112.048650,Carl's Jr.,83402-3356,ID,http://www.carlsjr.com


In [None]:
url = "https://api.census.gov/data/2018/acs/acs5?get=NAME,group(B19013)&for=zip%20code%20tabulation%20area:*"

### Transform No 1 DataFrame

### Transform No 2 DataFrame

### Create database connection

### Load DataFrames into database