# Connecting, populating, querying

### Setup engine and metadata

In [1]:
from sqlalchemy import MetaData, create_engine

In [2]:
engine = create_engine('sqlite:///db_final.sqlite')

In [15]:
metadata = MetaData()

In [16]:
connection = engine.connect()

### Create the Table to the Database

In [4]:
from sqlalchemy import Table, Column, String, Integer

In [6]:
census = Table('census', metadata,
                Column('state', String(30)),
                Column('sex', String(1)),
                Column('age', Integer()),
                Column('pop2000', Integer()),
                Column('pop2008', Integer())
              )

In [7]:
metadata.create_all(engine)

### Reading the Data from the CSV

In [8]:
import csv

In [9]:
value_list = []

In [12]:
with open('census.csv', newline='') as csvfile:
    csv_reader = csv.reader(csvfile, delimiter=',')
    for row in csv_reader:
        data = {
            'state'  :row[0],
            'sex'    :row[1],
            'age'    :row[2],
            'pop2000':row[3],
            'pop2008':row[4]
        }
        value_list.append(data)

### Load Data from a list into the Table

In [14]:
from sqlalchemy import insert

In [17]:
stmt = insert(census)
results = connection.execute(stmt, value_list)

In [18]:
results.rowcount

8772

### Build a Query to Determine the Average Age by Population

In [21]:
from sqlalchemy import select, func

In [22]:
stmt = select([census.columns.sex, 
              (func.sum(census.columns.pop2008*census.columns.age) / func.sum(census.columns.pop2008)).label('average_age')])

In [23]:
stmt = stmt.group_by(census.columns.sex)

In [25]:
results = connection.execute(stmt).fetchall()

In [26]:
for record in results:
    print(record.sex, record.average_age)

F 38
M 35


### Build a Query to Determine the Percentage of Population by Gender and State

In [27]:
from sqlalchemy import case, cast, Float

In [29]:
stmt = select([census.columns.state,
               (func.sum(case([
                   (census.columns.sex == 'F', census.columns.pop2000)],
                   else_ = 0))/cast(func.sum(census.columns.pop2000), Float) * 100).label('percentage_female')
              ])

In [30]:
stmt = stmt.group_by(census.columns.state)

In [31]:
results = connection.execute(stmt).fetchall()

In [34]:
for row in results:
    print(row.state, row.percentage_female)

Alabama 51.832407770179465
Alaska 49.301497893484594
Arizona 50.22361303057914
Arkansas 51.26992846221834
California 50.35233214901979
Colorado 49.84767060299562
Connecticut 51.66816507130644
Delaware 51.61109733558627
District of Columbia 53.129626141738385
Florida 51.36488001165242
Georgia 51.11408350339436
Hawaii 51.118011836915514
Idaho 49.98972623903102
Illinois 51.11224234802867
Indiana 50.95480313297678
Iowa 50.950398342534264
Kansas 50.821864107754735
Kentucky 51.32687036927168
Louisiana 51.75351596554121
Maine 51.50570813418951
Maryland 51.93575549972231
Massachusetts 51.843023571316785
Michigan 50.97246518318712
Minnesota 50.49332944301148
Mississippi 51.92229481794672
Missouri 51.46888602639692
Montana 50.32202690728538
Nebraska 50.8584549336086
Nevada 49.36736361384359
New Hampshire 50.858019844961746
New Jersey 51.51713956125773
New Mexico 51.0471720798335
New York 51.83453865150073
North Carolina 51.482262322084594
North Dakota 50.50069363231332
Ohio 51.46550350015544
Okl

### Build a Query to Determine the Difference by State from the 2000 and 2008 Censuses

In [38]:
from sqlalchemy import desc

In [35]:
stmt = select([census.columns.state,
               (census.columns.pop2008 - census.columns.pop2000).label('pop_change')
              ])

In [36]:
stmt = stmt.group_by(census.columns.state)

In [39]:
stmt = stmt.order_by(desc('pop_change'))

In [40]:
stmt = stmt.limit(10)

In [41]:
results = connection.execute(stmt).fetchall()

In [42]:
for row in results:
    print(row.state, row.pop_change)

California 105705
Florida 100984
Texas 51901
New York 47098
Pennsylvania 42387
Arizona 29509
Ohio 29392
Illinois 26221
Michigan 25126
North Carolina 24108
