In [1]:
import pandas as pd
import sqlalchemy
from sqlalchemy import create_engine, MetaData
from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, Integer, String, Numeric, Text, Float, ForeignKey
from sqlalchemy.orm import sessionmaker, relationship

In [2]:
# create engine to 'census_zips' sqlite db
engine = create_engine("sqlite:///census_zips.sqlite")

In [3]:
# declare the base
Base = declarative_base()

In [4]:
# define the ORM class for 'census_pop' which holds census data for 2010 through 2016 across all US counties
class Census_Pop(Base):
    __tablename__ = "census_pop"
    id = Column(Integer, primary_key=True)
    state = Column(Text)
    county = Column(Text)
    pop_2010 = Column(Float)
    pop_2011 = Column(Float)
    pop_2012 = Column(Float)
    pop_2013 = Column(Float)
    pop_2014 = Column(Float)
    pop_2015 = Column(Float)
    pop_2016 = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.county}"

In [5]:
# define the ORM class for 'zip_to_lat' which holds all lat/lon correlations to US standard zipcodes
class Zip_to_Lat(Base):
    __tablename__ = "zip_to_lat"
    id = Column(Integer, primary_key=True)
    zip_code = Column(Float)
    lat = Column(Float)
    lon = Column(Float)
    
    def __repr__(self):
        return f"id={self.id}, name={self.zip_code}"

In [6]:
# Create all of the tables
Base.metadata.create_all(engine)

In [7]:
# verifying tables are created
engine.table_names()

['census_pop', 'zip_to_lat']

In [8]:
# function to mass insert csv file data in to a sql table
def populate_table(engine, table, csvfile):
    #connect to engine
    conn = engine.connect()
    #pandas reads csv file
    df_of_data_to_insert = pd.read_csv(csvfile)
    #creates dict of records to write
    data = df_of_data_to_insert.to_dict(orient="records")
    #Deletes all in case they were already there
    conn.execute(table.delete())
    #pushes the whole dataset at once
    conn.execute(table.insert(),data)

In [9]:
# call populate_table function for each table class
populate_table(engine, Census_Pop.__table__, 'Resources/census_pop_2010_2016_county.csv')
populate_table(engine, Zip_to_Lat.__table__, 'Resources/zip_codes_to_latlon.csv')

In [11]:
# basic query to validat data is loaded in the db for Census data
engine.execute("SELECT * FROM census_pop LIMIT 1").fetchall()

[(1, 'Alabama', 'Alabama', 4785492.0, 4799918.0, 4815960.0, 4829479.0, 4843214.0, 4853875.0, 4863300.0)]

In [12]:
# basic query to validat data is loaded in the db for Zipcode data
engine.execute("SELECT * FROM zip_to_lat LIMIT 1").fetchall()

[(1, 705.0, 18.14, -66.26)]