In [1]:
import pandas as pd

# SQL Alchemy
from sqlalchemy import create_engine

# Import password authentication for the postgresql database
from config import password

### Connect to local database

In [2]:
# Create Engine and connection
engine = create_engine(f"postgresql://postgres:{password}@localhost:5432/fast_food_census_db")

In [3]:
# Check for tables
engine.table_names()

['zip', 'zip_zcta', 'census', 'restaurant', 'restaurant_address']

### Use pandas to load csv-converted dataframes into database

In [4]:
census_df = pd.read_csv("../02_transform_census/clean_census.csv", dtype={'zcta': 'str'})
census_df.head()

Unnamed: 0,zcta,population,median_age,median_household_income,per_capita_income,poverty_count,unemployment_count
0,601,17242.0,40.5,13092.0,6999.0,10772.0,2316.0
1,602,38442.0,42.3,16358.0,9277.0,19611.0,1927.0
2,603,48814.0,41.1,16603.0,11307.0,24337.0,3124.0
3,606,6437.0,43.3,12832.0,5943.0,4163.0,230.0
4,610,27073.0,42.1,19309.0,10220.0,11724.0,1290.0


In [5]:
census_df.to_sql(name='census', con=engine, if_exists="append", index=False)

In [6]:
zip_code_df = pd.read_csv("../02_transform_restaurant/final_zip_code.csv", dtype='str')
zip_code_df.head()

Unnamed: 0,zip_code,city,state
0,501,Holtsville,NY
1,544,Holtsville,NY
2,601,Adjuntas,PR
3,602,Aguada,PR
4,603,Aguadilla,PR


In [7]:
zip_code_df.to_sql(name='zip', con=engine, if_exists="append", index=False)

In [8]:
zcta_df = pd.read_csv("../02_transform_zip_zcta/zcta.csv", dtype='str')
zcta_df.head()

Unnamed: 0,zip_code,zcta
0,501,11742
1,544,11742
2,601,601
3,602,602
4,603,603


In [9]:
zcta_df.to_sql(name="zip_zcta", con=engine, if_exists="append", index=False)

In [10]:
restaurant_df = pd.read_csv("../02_transform_restaurant/restaurant_id.csv")
restaurant_df.head()

Unnamed: 0,restaurant_id,restaurant_name
0,1,SONIC Drive In
1,2,Taco Bell
2,3,Arby's
3,4,Steak 'n Shake
4,5,Wendy's


In [11]:
restaurant_df.to_sql(name='restaurant', con=engine, if_exists="append", index=False)

In [12]:
restaurant_address_df = pd.read_csv("../02_transform_restaurant/restaurant_address.csv", dtype={'zip_code': 'str'})
restaurant_address_df.head()

Unnamed: 0,restaurant_id,street_no,street_name,zip_code
0,1,800,N Canal Blvd,70301
1,1,124,John R Rd,48083
2,1,909,N Wood,75644
3,1,97,Gateway Blvd,82901
4,1,6557,S Staples St,78413


In [13]:
restaurant_address_df.to_sql(name='restaurant_address', con=engine, if_exists="append", index=False)

### Confirm data has been added by querying the tables

In [14]:
pd.read_sql_query('SELECT * FROM census', con=engine).head()

Unnamed: 0,zcta,population,median_age,median_household_income,per_capita_income,poverty_count,unemployment_count
0,601,17242.0,40.5,13092.0,6999.0,10772.0,2316.0
1,602,38442.0,42.3,16358.0,9277.0,19611.0,1927.0
2,603,48814.0,41.1,16603.0,11307.0,24337.0,3124.0
3,606,6437.0,43.3,12832.0,5943.0,4163.0,230.0
4,610,27073.0,42.1,19309.0,10220.0,11724.0,1290.0


In [15]:
pd.read_sql_query('SELECT * FROM zip_zcta', con=engine).head()

Unnamed: 0,zip_code,zcta
0,501,11742
1,544,11742
2,601,601
3,602,602
4,603,603


In [16]:
pd.read_sql_query('SELECT * FROM zip', con=engine).head()

Unnamed: 0,zip_code,city,state
0,501,Holtsville,NY
1,544,Holtsville,NY
2,601,Adjuntas,PR
3,602,Aguada,PR
4,603,Aguadilla,PR


In [17]:
pd.read_sql_query('SELECT * FROM restaurant_address', con=engine).head()

Unnamed: 0,restaurant_address_id,restaurant_id,street_no,street_name,zip_code
0,1,1,800,N Canal Blvd,70301
1,2,1,124,John R Rd,48083
2,3,1,909,N Wood,75644
3,4,1,97,Gateway Blvd,82901
4,5,1,6557,S Staples St,78413


In [18]:
pd.read_sql_query('SELECT * FROM restaurant', con=engine).head()

Unnamed: 0,restaurant_id,restaurant_name
0,1,SONIC Drive In
1,2,Taco Bell
2,3,Arby's
3,4,Steak 'n Shake
4,5,Wendy's


#### Top zip_code with the most number of fast food restaurants and related census

In [19]:
most_restaurants = pd.read_sql_query(\
                       'SELECT\
                            COUNT(restaurant_id) AS num_restaurants,\
                            z.zip_code, z.city, z.state,\
                            cs.zcta, cs.population, cs.median_age, cs.median_household_income,\
                            cs.per_capita_income, cs.poverty_count, cs.unemployment_count\
                        FROM restaurant_address AS ra\
                        INNER JOIN zip AS z\
                            ON z.zip_code = ra.zip_code\
                        INNER JOIN zip_zcta AS zz\
                            ON zz.zip_code = z.zip_code\
                        INNER JOIN census AS cs\
                            ON cs.zcta = zz.zcta\
                        GROUP BY z.zip_code, z.city, z.state,\
                            cs.zcta, cs.population, cs.median_age,\
                            cs.median_household_income, cs.per_capita_income,\
                            cs.poverty_count, cs.unemployment_count\
                        ORDER BY num_restaurants DESC\
                        LIMIT 10',\
                       con=engine)

#### Top zip_code with the least number of fast food restaurants and related census

In [20]:
least_restaurant = pd.read_sql_query(\
                       'SELECT\
                            COUNT(restaurant_id) AS num_restaurants,\
                            z.zip_code, z.city, z.state,\
                            cs.zcta, cs.population, cs.median_age, cs.median_household_income,\
                            cs.per_capita_income, cs.poverty_count, cs.unemployment_count\
                        FROM restaurant_address AS ra\
                        INNER JOIN zip AS z\
                            ON z.zip_code = ra.zip_code\
                        INNER JOIN zip_zcta AS zz\
                            ON zz.zip_code = z.zip_code\
                        INNER JOIN census AS cs\
                            ON cs.zcta = zz.zcta\
                        GROUP BY z.zip_code, z.city, z.state,\
                            cs.zcta, cs.population, cs.median_age,\
                            cs.median_household_income, cs.per_capita_income,\
                            cs.poverty_count, cs.unemployment_count\
                        ORDER BY num_restaurants ASC\
                        LIMIT 10',\
                    con=engine)

> Comparing some census data at the zcta with the most and least number fast food restaurants

In [21]:
most_restaurants

Unnamed: 0,num_restaurants,zip_code,city,state,zcta,population,median_age,median_household_income,per_capita_income,poverty_count,unemployment_count
0,9,32809,Orlando,FL,32809,28258.0,35.4,43919.0,21304.0,6416.0,1126.0
1,8,32810,Orlando,FL,32810,39384.0,32.8,48727.0,21728.0,7671.0,1576.0
2,8,75150,Mesquite,TX,75150,62452.0,32.2,52439.0,23139.0,8351.0,1793.0
3,8,43026,Hilliard,OH,43026,62169.0,35.3,86032.0,39083.0,4483.0,982.0
4,8,92335,Fontana,CA,92335,99284.0,29.0,50730.0,16138.0,19875.0,4565.0
5,8,30606,Athens,GA,30606,43716.0,33.0,48890.0,32344.0,10405.0,1572.0
6,8,89103,Las Vegas,NV,89103,52149.0,37.9,40261.0,23287.0,8294.0,1920.0
7,8,54701,Eau Claire,WI,54701,40130.0,34.6,56810.0,31100.0,4965.0,802.0
8,7,29621,Anderson,SC,29621,40931.0,44.0,56887.0,31854.0,4061.0,1115.0
9,7,44256,Medina,OH,44256,62916.0,42.2,78924.0,38406.0,4009.0,1006.0


In [22]:
least_restaurant

Unnamed: 0,num_restaurants,zip_code,city,state,zcta,population,median_age,median_household_income,per_capita_income,poverty_count,unemployment_count
0,1,1105,Springfield,MA,1105,11957.0,31.1,19921.0,13698.0,5996.0,645.0
1,1,1001,Agawam,MA,1001,17621.0,46.1,62144.0,35135.0,1513.0,314.0
2,1,1027,Easthampton,MA,1027,17848.0,45.3,62772.0,37036.0,1597.0,590.0
3,1,1013,Chicopee,MA,1013,23065.0,35.5,40594.0,23656.0,4603.0,825.0
4,1,1060,Northampton,MA,1060,15779.0,35.0,60020.0,36203.0,2280.0,498.0
5,1,1073,Southampton,MA,1073,6140.0,45.7,93854.0,46378.0,212.0,174.0
6,1,1085,Westfield,MA,1085,41942.0,39.5,64026.0,31563.0,2806.0,1223.0
7,1,1089,West Springfield,MA,1089,28666.0,38.9,51472.0,31135.0,3329.0,984.0
8,1,1040,Holyoke,MA,1040,40376.0,34.9,40656.0,23385.0,11630.0,1549.0
9,1,1107,Springfield,MA,1107,11582.0,27.0,27131.0,14816.0,5309.0,1046.0


#### Restaurant brands with the most number of stores in the dataset

In [23]:
pd.read_sql_query(\
                'SELECT \
                  r.restaurant_name,\
                  COUNT(ra.restaurant_id) AS num_restaurants\
                 FROM restaurant_address AS ra\
                 INNER JOIN restaurant AS r\
                 ON ra.restaurant_id = r.restaurant_id\
                 GROUP BY r.restaurant_name\
                 ORDER BY num_restaurants DESC\
                 LIMIT 10',\
                 con=engine)

Unnamed: 0,restaurant_name,num_restaurants
0,McDonald's,1729
1,Taco Bell,933
2,Burger King,768
3,Subway,766
4,Arby's,585
5,Wendy's,580
6,Jack in the Box,315
7,Pizza Hut,228
8,Domino's Pizza,213
9,Dairy Queen,207
