### Connecting to DB and Viewing Table Details

In [36]:
import sqlalchemy as db
import pandas as pd

In [5]:
engine = db.create_engine('sqlite:///data/census.sqlite')
connection = engine.connect()
metadata = db.MetaData()
census = db.Table('census', metadata, autoload = True, autoload_with = engine)

In [6]:
print(census.columns.keys())

['state', 'sex', 'age', 'pop2000', 'pop2008']


In [16]:
print(repr(metadata.tables['census']))

Table('census', MetaData(bind=None), Column('state', VARCHAR(length=30), table=<census>), Column('sex', VARCHAR(length=1), table=<census>), Column('age', INTEGER(), table=<census>), Column('pop2000', INTEGER(), table=<census>), Column('pop2008', INTEGER(), table=<census>), schema=None)


### Querying

In [23]:
query = db.select([census])

In [29]:
ResultProxy = connection.execute(query)

In [25]:
ResultSet = ResultProxy.fetchall()

In [26]:
ResultSet[:3]

[('Illinois', 'M', 0, 89600, 95012),
 ('Illinois', 'M', 1, 88445, 91829),
 ('Illinois', 'M', 2, 88729, 89547)]

#### Dealing with Large ResultSet

In [34]:
ResultProxy = connection.execute(query)
partial_results = ResultProxy.fetchmany(50)
print(len(partial_results))
ResultProxy.close()

50


#### Converting to dataframe

In [39]:
df = pd.DataFrame(ResultSet)
df.columns = ResultSet[0].keys()
df.head()

Unnamed: 0,state,sex,age,pop2000,pop2008
0,Illinois,M,0,89600,95012
1,Illinois,M,1,88445,91829
2,Illinois,M,2,88729,89547
3,Illinois,M,3,88868,90037
4,Illinois,M,4,91947,91111


### Filtering Data

#### where

In [47]:
query = db.select([census]).where(census.columns.sex=='F')
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultProxy.close()
ResultSet[:5]

[('Illinois', 'F', 0, 85910, 90286),
 ('Illinois', 'F', 1, 84396, 88126),
 ('Illinois', 'F', 2, 84764, 86291),
 ('Illinois', 'F', 3, 85598, 85170),
 ('Illinois', 'F', 4, 87729, 86959)]

#### in

In [48]:
query = db.select([census]).where(census.columns.state.in_(['Texas','New York']))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultProxy.close()
ResultSet[:5]

[('New York', 'M', 0, 126237, 128088),
 ('New York', 'M', 1, 124008, 125649),
 ('New York', 'M', 2, 124725, 121615),
 ('New York', 'M', 3, 126697, 120580),
 ('New York', 'M', 4, 131357, 122482)]

#### and, or, not

In [49]:
query = db.select([census]).where(db.and_(census.columns.state=='California', census.columns.sex!='M'))
ResultProxy = connection.execute(query)
ResultSet = ResultProxy.fetchall()
ResultProxy.close()
ResultSet[:5]

[('California', 'F', 0, 239605, 274356),
 ('California', 'F', 1, 236543, 269140),
 ('California', 'F', 2, 240010, 262556),
 ('California', 'F', 3, 245739, 259061),
 ('California', 'F', 4, 254522, 255544)]