# Advanced SQLAlchemy Queries

### Connecting to a MySQL Database

In [46]:
from sqlalchemy import create_engine
from sqlalchemy import select
from sqlalchemy import MetaData, Table
from sqlalchemy import desc, func

In [2]:
dialect_driver = 'mysql+pymysql://'
user_pass = 'student:datacamp'
host_port = '@courses.csrrinzqubik.us-east-1.rds.amazonaws.com:3306/'
db_name = 'census'

In [3]:
engine = create_engine(dialect_driver+user_pass+host_port+db_name)
engine.table_names()

['census', 'state_fact']

### Calculating a Difference between Two Columns

In [52]:
# using local db
engine = create_engine('sqlite:///census.sqlite')
metadata = MetaData()
census = Table('census', metadata, autoload=True, autoload_with=engine)
state_fact = Table('state_fact', metadata, autoload=True, autoload_with=engine)
connection = engine.connect()

In [10]:
stmt = select([census.columns.state,
               (census.columns.pop2008-census.columns.pop2000).\
               label('pop_change')])

In [11]:
stmt = stmt.group_by(census.columns.state)

In [14]:
stmt = stmt.order_by(desc('pop_change'))

In [15]:
stmt = stmt.limit(5)

In [16]:
results = connection.execute(stmt).fetchall()
for result in results:
    print('{}:{}'.format(result.state, result.pop_change))

California:105705
Florida:100984
Texas:51901
New York:47098
Pennsylvania:42387


### Determining the Overall Percentage of Females

In [19]:
from sqlalchemy import case, cast, Float, func

In [21]:
female_pop2000 = func.sum(
                case([
                    (census.columns.sex == 'F', 
                     census.columns.pop2000)],
                    else_=0))

In [22]:
total_pop2000 = cast(func.sum(census.columns.pop2000), Float)

In [23]:
stmt = select([female_pop2000 / total_pop2000 * 100])

In [24]:
percent_female = connection.execute(stmt).scalar()
percent_female

51.09467432293413

### Automatic Joins with an Established Relationship

In [28]:
stmt = select([census.columns.pop2000, state_fact.columns.abbreviation])

In [29]:
result = connection.execute(stmt).first()

In [30]:
for key in result.keys():
    print(key, getattr(result, key))

pop2000 89600
abbreviation IL


### Joins

In [31]:
stmt = select([census, state_fact])

In [32]:
stmt = stmt.select_from(
                    census.join(
                        state_fact,
                        census.columns.state == state_fact.columns.name
                    ))

In [33]:
result = connection.execute(stmt).first()

In [34]:
for key in result.keys():
    print(key, getattr(result, key))

state Illinois
sex M
age 0
pop2000 89600
pop2008 95012
id 13
name Illinois
abbreviation IL
country USA
type state
sort 10
status current
occupied occupied
notes 
fips_state 17
assoc_press Ill.
standard_federal_region V
census_region 2
census_region_name Midwest
census_division 3
census_division_name East North Central
circuit_court 7


### More Practice with Joins

In [35]:
stmt = select([
    census.columns.state,
    func.sum(census.columns.pop2008),
    state_fact.columns.census_division_name
])

In [36]:
stmt = stmt.select_from(
    census.join(state_fact, census.columns.state == state_fact.columns.name)
)

In [37]:
stmt = stmt.group_by(state_fact.columns.name)

In [38]:
results = connection.execute(stmt).fetchall()

In [39]:
for record in results:
    print(record)

('Alabama', 4649367, 'East South Central')
('Alaska', 664546, 'Pacific')
('Arizona', 6480767, 'Mountain')
('Arkansas', 2848432, 'West South Central')
('California', 36609002, 'Pacific')
('Colorado', 4912947, 'Mountain')
('Connecticut', 3493783, 'New England')
('Delaware', 869221, 'South Atlantic')
('Florida', 18257662, 'South Atlantic')
('Georgia', 9622508, 'South Atlantic')
('Hawaii', 1250676, 'Pacific')
('Idaho', 1518914, 'Mountain')
('Illinois', 12867077, 'East North Central')
('Indiana', 6373299, 'East North Central')
('Iowa', 3000490, 'West North Central')
('Kansas', 2782245, 'West North Central')
('Kentucky', 4254964, 'East South Central')
('Louisiana', 4395797, 'West South Central')
('Maine', 1312972, 'New England')
('Maryland', 5604174, 'South Atlantic')
('Massachusetts', 6492024, 'New England')
('Michigan', 9998854, 'East North Central')
('Minnesota', 5215815, 'West North Central')
('Mississippi', 2922355, 'East South Central')
('Missouri', 5891974, 'West North Central')
('Mon

### Using alias to handle same table joined queries

In [40]:
engine = create_engine('sqlite:///employees.sqlite')
metadata = MetaData()
employees = Table('employees', metadata, autoload=True, autoload_with=engine)
connection = engine.connect()

In [41]:
managers = employees.alias('managers')

In [42]:
stmt = select([managers.columns.name.label('manager'),
               employees.columns.name.label('employee')
              ])

In [43]:
stmt = stmt.where(managers.columns.id == employees.columns.mgr)
stmt = stmt.order_by(managers.columns.name)

In [44]:
results = connection.execute(stmt).fetchall()

In [45]:
for record in results:
    print(record)

('FILLMORE', 'GRANT')
('FILLMORE', 'ADAMS')
('FILLMORE', 'MONROE')
('GARFIELD', 'JOHNSON')
('GARFIELD', 'LINCOLN')
('GARFIELD', 'POLK')
('GARFIELD', 'WASHINGTON')
('HARDING', 'TAFT')
('HARDING', 'HOOVER')
('JACKSON', 'HARDING')
('JACKSON', 'GARFIELD')
('JACKSON', 'FILLMORE')
('JACKSON', 'ROOSEVELT')


### Leveraging Functions and Group_bys with Hierarchical Data

In [47]:
stmt = select([managers.columns.name, func.count(employees.columns.id)])
stmt = stmt.where(managers.columns.id == employees.columns.mgr)
stmt = stmt.group_by(managers.columns.name)

In [49]:
results = connection.execute(stmt).fetchall()

In [50]:
for record in results:
    print(record)

('FILLMORE', 3)
('GARFIELD', 4)
('HARDING', 2)
('JACKSON', 4)


### Working on Blocks of Records

In [61]:
stmt = select([census.columns.state])
results_proxy = connection.execute(stmt)
results_proxy

<sqlalchemy.engine.result.ResultProxy at 0x12408cb67f0>

In [62]:
state_count = {}
more_results = True
while more_results:
    partial_results = results_proxy.fetchmany(10)
    if partial_results == []:
        more_results = False
    for row in partial_results:
        if row.state in state_count:
            state_count[row.state] += 1
        else:
            state_count[row.state] = 1
results_proxy.close()

In [63]:
state_count

{'Illinois': 172,
 'New Jersey': 172,
 'District of Columbia': 172,
 'North Dakota': 172,
 'Florida': 172,
 'Maryland': 172,
 'Idaho': 172,
 'Massachusetts': 172,
 'Oregon': 172,
 'Nevada': 172,
 'Michigan': 172,
 'Wisconsin': 172,
 'Missouri': 172,
 'Washington': 172,
 'North Carolina': 172,
 'Arizona': 172,
 'Arkansas': 172,
 'Colorado': 172,
 'Indiana': 172,
 'Pennsylvania': 172,
 'Hawaii': 172,
 'Kansas': 172,
 'Louisiana': 172,
 'Alabama': 172,
 'Minnesota': 172,
 'South Dakota': 172,
 'New York': 172,
 'California': 172,
 'Connecticut': 172,
 'Ohio': 172,
 'Rhode Island': 172,
 'Georgia': 172,
 'South Carolina': 172,
 'Alaska': 172,
 'Delaware': 172,
 'Tennessee': 172,
 'Vermont': 172,
 'Montana': 172,
 'Kentucky': 172,
 'Utah': 172,
 'Nebraska': 172,
 'West Virginia': 172,
 'Iowa': 172,
 'Wyoming': 172,
 'Maine': 172,
 'New Hampshire': 172,
 'Mississippi': 172,
 'Oklahoma': 172,
 'New Mexico': 172,
 'Virginia': 172,
 'Texas': 172}