In [1]:
import sqlite3
from sqlite3 import OperationalError
from IPython.core.display import display, HTML

import pandas as pd
pd.set_option('display.width', 500)
pd.set_option('display.max_columns', 100)
pd.set_option('display.notebook_repr_html', True)


# Open/create sqlite database, reset so tables from exercise are dropped.
db = sqlite3.connect('L18DB.sqlite')
cursor = db.cursor()
cursor.execute("DROP TABLE IF EXISTS candidates")
cursor.execute("DROP TABLE IF EXISTS contributors")
cursor.execute("PRAGMA foreign_keys=1")

cursor.execute('''CREATE TABLE candidates (
               id INTEGER PRIMARY KEY NOT NULL, 
               first_name TEXT, 
               last_name TEXT, 
               middle_init TEXT, 
               party TEXT NOT NULL)''')

cursor.execute('''CREATE TABLE contributors (
          id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, 
          last_name TEXT, 
          first_name TEXT, 
          middle_name TEXT, 
          street_1 TEXT, 
          street_2 TEXT, 
          city TEXT, 
          state TEXT, 
          zip TEXT, 
          amount REAL, 
          date DATETIME, 
          candidate_id INTEGER NOT NULL, 
          FOREIGN KEY(candidate_id) REFERENCES candidates(id))''')



with open ("candidates.txt") as candidates:
    next(candidates) # jump over the header
    for line in candidates.readlines():
        cid, first_name, last_name, middle_name, party = line.strip().split('|')
        vals_to_insert = (int(cid), first_name, last_name, middle_name, party)
        cursor.execute('''INSERT INTO candidates 
                  (id, first_name, last_name, middle_init, party)
                  VALUES (?, ?, ?, ?, ?)''', vals_to_insert)
        
with open ("contributors.txt") as contributors:
    next(contributors)
    for line in contributors.readlines():
        cid, last_name, first_name, middle_name, street_1, street_2, \
            city, state, zip_code, amount, date, candidate_id = line.strip().split('|')
        vals_to_insert = (last_name, first_name, middle_name, street_1, street_2, 
                          city, state, int(zip_code), amount, date, candidate_id)
        cursor.execute('''INSERT INTO contributors (last_name, first_name, middle_name, 
                           street_1, street_2, city, state, zip, amount, date, candidate_id) 
                           VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)''', vals_to_insert) 


db.commit()


def viz_tables(cols, query, print_query=True):
    """Prints SQL query and displays DataFrame created from queried records."""
    q = cursor.execute(query).fetchall()
    framelist = []
    for i, col_name in enumerate(cols):
        framelist.append((col_name, [col[i] for col in q]))
    if print_query:
        print(query)
    display(pd.DataFrame.from_items(framelist))

    
def get_table_columns(table_name):
    """Returns list of column names in given table."""
    query = "PRAGMA table_info({0})".format(table_name)
    return [col[1] for col in cursor.execute(query)]


# Get list of column names for each table.
candidate_cols = get_table_columns('candidates')
contributor_cols = get_table_columns('contributors')


def print_section_break(section, style='h1'):
    print('='*40)
    display(HTML('<{0}>{1}</{0}>'.format(style, section)))
    print('='*40)
    
def print_update(update, style='h4'):
    print('')
    display(HTML('<{0}>{1}</{0}>'.format(style, update)))
    print('')
    
print_section_break('Step 2: Various Queries')

query1 = "SELECT * FROM contributors WHERE state = 'PA'"
viz_tables(contributor_cols, query1)

query2 = "SELECT * FROM contributors WHERE amount > 1000"
viz_tables(contributor_cols, query2)

query3 = "SELECT * FROM contributors WHERE state = 'UT' AND amount > 1000"
viz_tables(contributor_cols, query3)

query4 = "SELECT * FROM contributors WHERE state = ''"
viz_tables(contributor_cols, query4)

query5 = "SELECT * FROM contributors WHERE state IN ('WA', 'PA')"
viz_tables(contributor_cols, query5)

query6 = "SELECT * FROM contributors WHERE amount BETWEEN 100 AND 200"
viz_tables(contributor_cols, query6)

print_section_break('Step 3: Sorting')

query7 = "SELECT * FROM contributors ORDER BY last_name DESC"
viz_tables(candidate_cols, query7)

query8 = "SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 ORDER BY amount DESC"
viz_tables(contributor_cols, query8)

query9 = "SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 ORDER BY candidate_id DESC, amount DESC"
viz_tables(contributor_cols, query9)

print_section_break('Step 4: Selecting Columns')

# Selecting unique combinations of last_name and first_name i.e., some rows 
# may have duplicate values for either column but no rows will have the same 
# last_name, first_name pair.
query10 = "SELECT DISTINCT last_name, first_name FROM contributors"
viz_tables(['last_name', 'first_name'], query10)

print_section_break('Step 5: Altering Tables')

print_update('Adding column full_name to candidates table.')
# Alter tables.
try:
    cursor.execute('''ALTER TABLE candidates ADD COLUMN full_name TEXT''')
except OperationalError:
    print('Error adding full_name column to candidates. May already exist.')

candidate_cols = get_table_columns('candidates')

# Create list of tuples: (full_name, id)
query = '''SELECT id, last_name, first_name FROM candidates'''
full_name_and_id = [(attr[1] + ", " + attr[2], attr[0]) 
                    for attr in cursor.execute(query).fetchall()] 

# Update full_name column with tuples created above.
update = '''UPDATE candidates SET full_name = ? WHERE id = ?''' 
for rows in full_name_and_id:
    cursor.execute(update, rows)

viz_tables(candidate_cols, '''SELECT * FROM candidates''', False)

update = '''UPDATE candidates SET full_name = "Eventual Winner" WHERE last_name = "Obama"'''
cursor.execute(update)
update = '''UPDATE candidates SET full_name = "Eventual Loser" WHERE last_name = "McCain"'''
cursor.execute(update)

print_update('Changed full_name for last_name Obama and McCain.')
viz_tables(candidate_cols, "SELECT * FROM candidates")

print_update('Adding column full_name to contributors table.')
try:
    cursor.execute('''ALTER TABLE contributors ADD COLUMN full_name TEXT''')
except OperationalError:
    print('Error adding full_name column to contributors. May already exist.')

cursor.execute("UPDATE contributors SET full_name = 'Too Much' WHERE amount > 1000")
viz_tables(['full_name', 'amount'], "SELECT * FROM contributors")

contributor_cols = get_table_columns('contributors')

print_section_break('Step 6: Aggregation')

agg_query1 = 'SELECT COUNT(amount) FROM contributors WHERE amount > 1000'
viz_tables(['amount'], agg_query1)

agg_query2 = 'SELECT AVG(amount) FROM contributors'
viz_tables(['amount'], agg_query2)

agg_query3= 'SELECT state, AVG(amount) FROM contributors GROUP BY state'
viz_tables(['state', 'amount'], agg_query3)

print_section_break('Step 7: DELETE')

deletion1 = "DELETE FROM contributors WHERE last_name = 'Ahrens'"
viz_tables(contributor_cols, deletion1)

print_section_break('Step 8: LIMIT')
 
# 10 most generous donors.
limit1 = """
SELECT id, first_name, last_name, SUM(amount) AS total_donations 
FROM contributors 
GROUP BY id 
ORDER BY total_donations DESC 
LIMIT 10
"""
viz_tables(['id', 'first_name', 'last_name', 'total_donations'], limit1)

# 10 least generous donors (with positive amount).
limit2 = """
SELECT id, first_name, last_name, SUM(amount) AS total_donations 
FROM contributors 
GROUP BY id 
HAVING total_donations > 0 
ORDER BY total_donations ASC
LIMIT 10
"""
viz_tables(['id', 'first_name', 'last_name', 'total_donations'], limit2)

db.commit()
db.close()



SELECT * FROM contributors WHERE state = 'PA'


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,71,BUCKLEY,WALTER,W.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
1,72,BUCKLEY,MARJORIE,B.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
2,94,Raught,Philip,M,4714 Plum Way,,Pittsburgh,PA,15201,-1046.0,2008-04-21,32
3,95,Ferrara,Judith,D,1508 Waterford Road,,Yardley,PA,19067,-1100.0,2008-04-21,32
4,166,ABEL,JOHN,H.,422 THOMAS STREET,,BETHLEHEM,PA,180153316,200.0,2008-01-22,37


SELECT * FROM contributors WHERE amount > 1000


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,6,Akin,Mike,,181 Baywood Lane,,Monticello,AR,71655,1500.0,2007-05-18,16
1,10,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1300.0,2007-06-29,16
2,14,Altes,R.D.,,8600 Moody Road,,Fort Smith,AR,72903,2300.0,2007-06-21,16
3,16,Anthony,John,,211 Long Island Drive,,Hot Springs,AR,71913,2300.0,2007-06-12,16
4,22,Baker,David,,2550 Adamsbrooke Drive,,Conway,AR,72034,2300.0,2007-04-11,16
5,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
6,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20
7,34,Buck,Blaine,M,45 Eaton Ave,,Camden,ME,48431752,2300.0,2007-09-30,20
8,46,Buchanan,John,,2025 NW 29th Rd,,Boca Raton,FL,334316303,1300.0,2007-08-09,20
9,136,ABRAMOWITZ,NIRA,,411 HARBOR ROAD,,SOUTHPORT,CT,68901376,2300.0,2007-09-14,35


SELECT * FROM contributors WHERE state = 'UT' AND amount > 1000


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
1,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20


SELECT * FROM contributors WHERE state = ''


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,126,BOURNE,TRAVIS,,LAGE KAART 77,,BRASSCHATT,,2930,-500.0,2008-11-20,35


SELECT * FROM contributors WHERE state IN ('WA', 'PA')


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,63,BURKE,SUZANNE,M.,3401 EVANSTON,,SEATTLE,WA,981038677,-700.0,2008-03-05,22
1,71,BUCKLEY,WALTER,W.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
2,72,BUCKLEY,MARJORIE,B.,1635 COUNTRY ROAD,,BETHLEHEM,PA,180155718,-100.0,2008-03-05,22
3,94,Raught,Philip,M,4714 Plum Way,,Pittsburgh,PA,15201,-1046.0,2008-04-21,32
4,95,Ferrara,Judith,D,1508 Waterford Road,,Yardley,PA,19067,-1100.0,2008-04-21,32
5,101,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-08,34
6,107,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-14,34
7,166,ABEL,JOHN,H.,422 THOMAS STREET,,BETHLEHEM,PA,180153316,200.0,2008-01-22,37


SELECT * FROM contributors WHERE amount BETWEEN 100 AND 200


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,4,Ahrens,Don,,4034 Rennellwood Way,,Pleasanton,CA,94566,100.0,2007-06-21,16
1,5,Akin,Charles,,10187 Sugar Creek Road,,Bentonville,AR,72712,100.0,2007-06-16,16
2,13,Allison,Rebecca,,3206 Summit Court,,Little Rock,AR,72227,200.0,2007-06-12,16
3,18,Arbogast,Robert,,12900 State Route 56 SE,,Mount Sterling,OH,43143,100.0,2007-06-22,16
4,28,Buckheit,Bruce,,8904 KAREN DR,,FAIRFAX,VA,220312731,100.0,2007-09-19,20
5,32,Buck,Thomas,,4206 Terrace Street,,Kansas City,MO,64111,100.0,2007-09-25,20
6,33,Buck,Jay,K.,1855 Old Willow Rd Unit 322,,Northfield,IL,600932918,200.0,2007-09-12,20
7,38,Bucher,Ida,M,1400 Warnall Ave,,Los Angeles,CA,900245333,100.0,2007-07-10,20
8,47,Buchanan,John,,2025 NW 29th Rd,,Boca Raton,FL,334316303,200.0,2007-08-14,20
9,101,Aaronson,Rebecca,,2000 Village Green Dr Apt 12,,Mill Creek,WA,980125787,100.0,2008-02-08,34




SELECT * FROM contributors ORDER BY last_name DESC


Unnamed: 0,id,first_name,last_name,middle_init,party
0,81,Waddell,James,L.,1823 Spel Lane SW
1,84,Verster,Jeanette,M.,7220 SW 61st St
2,85,Uihlein,Richard,,1396 N Waukegan Rd
3,128,TOLLESTRUP,TRAVIS,W.,16331 WINECREEK RD.
4,76,Schuff,Bryan,,1700 W Sweden Rd
5,97,Sanford,Bradley,,940 Post St #43
6,127,SECRIST,BRIAN,L.,3 MULE DEER TRAIL
7,90,Rozenfeld,Timur,,57 Herbert Road
8,111,Reid,Elizabeth,,73 W Patent Rd
9,112,Reich,Thomas,,499 Park Ave


SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 ORDER BY amount DESC


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20
1,160,ABATE,MARIA,ELENA,1291 NIGHTINGALE AVENUE,,MIAMI SPRINGS,FL,331663832,2600.0,2008-01-25,37
2,14,Altes,R.D.,,8600 Moody Road,,Fort Smith,AR,72903,2300.0,2007-06-21,16
3,16,Anthony,John,,211 Long Island Drive,,Hot Springs,AR,71913,2300.0,2007-06-12,16
4,22,Baker,David,,2550 Adamsbrooke Drive,,Conway,AR,72034,2300.0,2007-04-11,16
5,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
6,34,Buck,Blaine,M,45 Eaton Ave,,Camden,ME,48431752,2300.0,2007-09-30,20
7,136,ABRAMOWITZ,NIRA,,411 HARBOR ROAD,,SOUTHPORT,CT,68901376,2300.0,2007-09-14,35
8,6,Akin,Mike,,181 Baywood Lane,,Monticello,AR,71655,1500.0,2007-05-18,16
9,10,Allen,John D.,,1052 Cannon Mill Drive,,North Augusta,SC,29860,1300.0,2007-06-29,16


SELECT * FROM contributors WHERE amount BETWEEN 1000 AND 5000 ORDER BY candidate_id DESC, amount DESC


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id
0,160,ABATE,MARIA,ELENA,1291 NIGHTINGALE AVENUE,,MIAMI SPRINGS,FL,331663832,2600.0,2008-01-25,37
1,175,ABRAHAM,SALEM,A.,P.O. BOX 7,,CANADIAN,TX,790140007,1300.0,2008-01-30,37
2,157,ABBOTT,MIKE,E.,4516 OSPREY LNDG,,NICEVILLE,FL,325786810,1000.0,2008-01-15,37
3,174,ABRAHAM,SALEM,A.,P.O. BOX 7,,CANADIAN,TX,790140007,1000.0,2008-01-17,37
4,136,ABRAMOWITZ,NIRA,,411 HARBOR ROAD,,SOUTHPORT,CT,68901376,2300.0,2007-09-14,35
5,139,ABOUBAKARE,NASAR,,1400 SAN MIGUEL DRIVE,,CORONA DEL MAR,CA,926251300,1000.0,2007-07-09,35
6,123,Aaron,Barbara,,2298 Pacific Ave # 6,,San Francisco,CA,941151435,1000.0,2008-02-11,34
7,31,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,4600.0,2007-08-14,20
8,29,Buckel,Linda,,PO Box 683130,,Park City,UT,840683130,2300.0,2007-08-14,20
9,34,Buck,Blaine,M,45 Eaton Ave,,Camden,ME,48431752,2300.0,2007-09-30,20




SELECT DISTINCT last_name, first_name FROM contributors


Unnamed: 0,last_name,first_name
0,Agee,Steven
1,Ahrens,Don
2,Akin,Charles
3,Akin,Mike
4,Akin,Rebecca
5,Aldridge,Brittni
6,Allen,John D.
7,Allison,John W.
8,Allison,Rebecca
9,Altes,R.D.










Unnamed: 0,id,first_name,last_name,middle_init,party,full_name
0,16,Mike,Huckabee,,R,"Huckabee, Mike"
1,20,Barack,Obama,,D,"Obama, Barack"
2,22,Rudolph,Giuliani,,R,"Giuliani, Rudolph"
3,24,Mike,Gravel,,D,"Gravel, Mike"
4,26,John,Edwards,,D,"Edwards, John"
5,29,Bill,Richardson,,D,"Richardson, Bill"
6,30,Duncan,Hunter,,R,"Hunter, Duncan"
7,31,Dennis,Kucinich,,D,"Kucinich, Dennis"
8,32,Ron,Paul,,R,"Paul, Ron"
9,33,Joseph,Biden,,D,"Biden, Joseph"






SELECT * FROM candidates


Unnamed: 0,id,first_name,last_name,middle_init,party,full_name
0,16,Mike,Huckabee,,R,"Huckabee, Mike"
1,20,Barack,Obama,,D,Eventual Winner
2,22,Rudolph,Giuliani,,R,"Giuliani, Rudolph"
3,24,Mike,Gravel,,D,"Gravel, Mike"
4,26,John,Edwards,,D,"Edwards, John"
5,29,Bill,Richardson,,D,"Richardson, Bill"
6,30,Duncan,Hunter,,R,"Hunter, Duncan"
7,31,Dennis,Kucinich,,D,"Kucinich, Dennis"
8,32,Ron,Paul,,R,"Paul, Ron"
9,33,Joseph,Biden,,D,"Biden, Joseph"






SELECT * FROM contributors


Unnamed: 0,full_name,amount
0,1,Agee
1,2,Ahrens
2,3,Ahrens
3,4,Ahrens
4,5,Akin
5,6,Akin
6,7,Akin
7,8,Aldridge
8,9,Allen
9,10,Allen




SELECT COUNT(amount) FROM contributors WHERE amount > 1000


Unnamed: 0,amount
0,12


SELECT AVG(amount) FROM contributors


Unnamed: 0,amount
0,3.418114


SELECT state, AVG(amount) FROM contributors GROUP BY state


Unnamed: 0,state,amount
0,,-500.0
1,AK,403.333333
2,AR,1183.333333
3,AZ,120.0
4,CA,-217.988261
5,CO,-1455.75
6,CT,2300.0
7,DC,-309.982
8,FL,-135.0
9,IA,250.0




DELETE FROM contributors WHERE last_name = 'Ahrens'


Unnamed: 0,id,last_name,first_name,middle_name,street_1,street_2,city,state,zip,amount,date,candidate_id,full_name





SELECT id, first_name, last_name, SUM(amount) AS total_donations 
FROM contributors 
GROUP BY id 
ORDER BY total_donations DESC 
LIMIT 10



Unnamed: 0,id,first_name,last_name,total_donations
0,31,Linda,Buckel,4600.0
1,160,MARIA,ABATE,2600.0
2,14,R.D.,Altes,2300.0
3,16,John,Anthony,2300.0
4,22,David,Baker,2300.0
5,29,Linda,Buckel,2300.0
6,34,Blaine,Buck,2300.0
7,136,NIRA,ABRAMOWITZ,2300.0
8,6,Mike,Akin,1500.0
9,10,John D.,Allen,1300.0



SELECT id, first_name, last_name, SUM(amount) AS total_donations 
FROM contributors 
GROUP BY id 
HAVING total_donations > 0 
ORDER BY total_donations ASC
LIMIT 10



Unnamed: 0,id,first_name,last_name,total_donations
0,27,Steve,Buckler,25.0
1,50,Ryan,Harrison,25.0
2,102,Elaine,Aarons,25.0
3,141,PATRICIA,ABEGG,25.0
4,144,PATRICIA,ABEGG,25.0
5,145,PATRICIA,ABEGG,25.0
6,161,PETER,ABAIR,25.0
7,159,PAULINE,ABBO,35.0
8,19,William,Ardle,50.0
9,26,Steve,Buckler,50.0
