# Census Case Study 

https://www.datacamp.com/courses/introduction-to-relational-databases-in-python

## Setup the Engine and MetaData

In [1]:
# Import create_engine, MetaData
from sqlalchemy import MetaData, create_engine

# Define an engine to connect to chapter5.sqlite: engine
engine = create_engine('mysql+pymysql://root:password@localhost:8880/datacamp_cs')

# Initialize MetaData: metadata
metadata = MetaData()

# Use the .table_names() method on the engine to print the table names
print(engine.table_names())

[]


## Create the Table to the Database

In [2]:
# Import Table, Column, String, and Integer
from sqlalchemy import Table, Column, String, Integer

# Build a census table: census
census = Table('census', metadata,
               Column('state', String(30)),
               Column('sex', String(1)),
               Column('age', Integer()),
               Column('pop2000', Integer()),
               Column('pop2008', Integer()))

# Create the table in the database
metadata.create_all(engine)

In [4]:
# Use the .table_names() method on the engine to print the table names
print(engine.table_names())

['census']


As you see above, the census table is created.

## Reading the Data from the CSV

In [5]:
# Import the csv module
import csv

# Create the csvfile and csv_reader
csvfile = open('census.csv', 'r')
csv_reader = csv.reader(csvfile)


# Create an empty list: values_list
values_list = []

# Iterate over the rows
for row in csv_reader:
    # Create a dictionary with the values
    data = {'state': row[0], 'sex': row[1], 'age': row[2], 'pop2000': row[3],
            'pop2008': row[4]}
    # Append the dictionary to the values list
    values_list.append(data)

## Load Data from a list into the Table

In [8]:
# Import insert
from sqlalchemy import insert

connection = engine.connect()

# Build insert statement: stmt
stmt = insert(census)

# Use values_list to insert data: results
results = connection.execute(stmt, values_list)

# Print rowcount
print(results.rowcount)

8772


## Build a Query to Determine the Average Age by Population

In [10]:
# Import select
from sqlalchemy import select, func

# Calculate weighted average age: stmt
stmt = select([census.columns.sex,
               (func.sum(census.columns.pop2008 * census.columns.age) /
                func.sum(census.columns.pop2008)).label('average_age')
               ])

# Group by sex
stmt = stmt.group_by(census.columns.sex)

# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()

# Print the average age by sex
for result in results:
    print(result.sex, result.average_age)

M 35.7836
F 38.0856


## Build a Query to Determine the Percentage of Population by Gender and State

In [11]:
# import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float

# Build a query to calculate the percentage of females in 2000: stmt
stmt = select([census.columns.state,
    (func.sum(
        case([
            (census.columns.sex == 'F', census.columns.pop2000)
        ], else_=0)) /
     cast(func.sum(census.columns.pop2000), Float) * 100).label('percent_female')
])

# Group By state
stmt = stmt.group_by(census.columns.state)

# Execute the query and store the results: results
results = connection.execute(stmt).fetchall()

# Print the percentage
for result in results:
    print(result.state, result.percent_female)


Illinois 51.1122
New Jersey 51.5171
District of Columbia 53.1296
North Dakota 50.5007
Florida 51.3649
Maryland 51.9358
Idaho 49.9897
Massachusetts 51.843
Oregon 50.4295
Nevada 49.3674
Michigan 50.9725
Wisconsin 50.6149
Missouri 51.4689
Washington 50.5186
North Carolina 51.4823
Arizona 50.2236
Arkansas 51.2699
Colorado 49.8477
Indiana 50.9548
Pennsylvania 51.7404
Hawaii 51.118
Kansas 50.8219
Louisiana 51.7535
Alabama 51.8324
Minnesota 50.4933
South Dakota 50.5258
New York 51.8345
California 50.3523
Connecticut 51.6682
Ohio 51.4655
Rhode Island 52.0734
Georgia 51.1141
South Carolina 51.7307
Alaska 49.3015
Delaware 51.6111
Tennessee 51.4307
Vermont 51.0186
Montana 50.322
Kentucky 51.3269
Utah 49.973
Nebraska 50.8585
West Virginia 51.4004
Iowa 50.9504
Wyoming 49.946
Maine 51.5057
New Hampshire 50.858
Mississippi 51.9223
Oklahoma 51.1136
New Mexico 51.0472
Virginia 51.6573
Texas 50.5157


  self.dialect.type_compiler.process(cast.typeclause.type))


## Build a Query to Determine the Difference by State from the 2000 and 2008 Censuses

In [13]:
from sqlalchemy import desc

# Build query to return state name and population difference from 2008 to 2000
stmt = select([census.columns.state,
     (census.columns.pop2008-census.columns.pop2000).label('pop_change')
])

# Group by State
stmt = stmt.group_by(census.columns.state)

# Order by Population Change
stmt = stmt.order_by(desc('pop_change'))

# Limit to top 10
stmt = stmt.limit(10)

# Use connection to execute the statement and fetch all results
results = connection.execute(stmt).fetchall()

# Print the state and population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))

Texas:40137
California:35406
Florida:21954
Arizona:14377
Georgia:13357
North Carolina:11574
Virginia:6639
Colorado:6425
Utah:5934
Illinois:5412
