## Setup the Engine and MetaData

In [16]:
# Import create_engine, MetaData
from sqlalchemy import create_engine,MetaData

# Define an engine to connect to chapter5.sqlite: engine
engine = create_engine('sqlite:///chapter5.sqlite')

# Initialize MetaData: metadata
metadata =MetaData()


In [17]:
# Import Table, Column, String, and Integer
from sqlalchemy import Table, Column, String, Integer

# Build a census table: census
census = Table('census', metadata,
               Column('state', String(30)),
               Column('sex', String(1)),
               Column('age', Integer()),
               Column('pop2000', Integer()),
               Column('pop2008', Integer()))

# Create the table in the database
metadata.create_all(engine)


## Reading the Data from the CSV

In [18]:
# Create an empty list: values_list
import csv
values_list =[]
f=open('census.csv')
csv_reader=csv.reader(f)
    # Iterate over the rows
for row in csv_reader:
        # Create a dictionary with the values
    data = {'state': row[0],'sex': row[1],'age':row[2],'pop2000':row[3],
                'pop2008':row[4]}
        # Append the dictionary to the values list
    values_list.append(data)
f.close()

## Load Data from a list into the Table

Using the multiple insert pattern, we will load the data from values_list into the table.

In [14]:
# Import insert
from sqlalchemy import insert

# Build insert statement: stmt
stmt=insert(census)

# Use values_list to insert data: results
results=engine.execute(stmt,values_list)

# Print rowcount
print(results.rowcount)


8772


## Build a Query to Determine the Average Age by Population

In [20]:
# Import select
from sqlalchemy import select,func

# Calculate weighted average age: stmt
stmt = select([census.columns.sex,
               (func.sum(census.columns.pop2008 * census.columns.age) /
                func.sum(census.columns.pop2008)).label('average_age')
               ])

# Group by sex
stmt = stmt.group_by(census.columns.sex)

# Execute the query and store the results: results
results = engine.execute(stmt).fetchall()

# Print the average age by sex
for result in results:
    print(result.sex, result.average_age)


F 38
M 35


## Build a Query to Determine the Percentage of Population by Gender and State

In [21]:
# import case, cast and Float from sqlalchemy
from sqlalchemy import case, cast, Float

# Build a query to calculate the percentage of females in 2000: stmt
stmt = select([census.columns.state,
    (func.sum(
        case([
            (census.columns.sex == 'F', census.columns.pop2000)
        ], else_=0)) /
     cast(func.sum(census.columns.pop2000), Float) * 100).label('percent_female')
])

# Group By state
stmt = stmt.group_by(census.columns.state)

# Execute the query and store the results: results
results = engine.execute(stmt).fetchall()

# Print the percentage
for result in results:
    print(result.state, result.percent_female)


Alabama 51.8324077702
Alaska 49.3014978935
Arizona 50.2236130306
Arkansas 51.2699284622
California 50.3523321490
Colorado 49.8476706030
Connecticut 51.6681650713
Delaware 51.6110973356
District of Columbia 53.1296261417
Florida 51.3648800117
Georgia 51.1140835034
Hawaii 51.1180118369
Idaho 49.9897262390
Illinois 51.1122423480
Indiana 50.9548031330
Iowa 50.9503983425
Kansas 50.8218641078
Kentucky 51.3268703693
Louisiana 51.7535159655
Maine 51.5057081342
Maryland 51.9357554997
Massachusetts 51.8430235713
Michigan 50.9724651832
Minnesota 50.4933294430
Mississippi 51.9222948179
Missouri 51.4688860264
Montana 50.3220269073
Nebraska 50.8584549336
Nevada 49.3673636138
New Hampshire 50.8580198450
New Jersey 51.5171395613
New Mexico 51.0471720798
New York 51.8345386515
North Carolina 51.4822623221
North Dakota 50.5006936323
Ohio 51.4655035002
Oklahoma 51.1136245708
Oregon 50.4294670362
Pennsylvania 51.7404347305
Rhode Island 52.0734339190
South Carolina 51.7307212977
South Dakota 50.5258358137


  'storage.' % (dialect.name, dialect.driver))


## Build a Query to Determine the Difference by State from the 2000 and 2008 Censuses

In [25]:
from sqlalchemy import desc
# Build query to return state name and population difference from 2008 to 2000
stmt = select([census.columns.state,
     (census.columns.pop2008-census.columns.pop2000).label('pop_change')
])

# Group by State
stmt = stmt.group_by(census.columns.state)

# Order by Population Change
stmt = stmt.order_by(desc('pop_change'))

# Limit to top 10
stmt = stmt.limit(10)

# Use connection to execute the statement and fetch all results
results = engine.execute(stmt).fetchall()

# Print the state an population change for each record
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))


California:105705
Florida:100984
Texas:51901
New York:47098
Pennsylvania:42387
Arizona:29509
Ohio:29392
Illinois:26221
Michigan:25126
North Carolina:24108
