In [1]:
import sqlite3
import pprint
import time

db = sqlite3.connect("sqlite3_group10.db")
cur = db.cursor()

## Simple Queries

### Simple Query 1 - Unarmed Males under 18 killed by Police

In [2]:
query = '''
SELECT name, age 
FROM police_killings 
WHERE age < 18 
AND gender = "M" 
AND armed = "unarmed" 
ORDER BY age;'''

cur.execute(query)

pprint.pprint(cur.fetchall())

[('Jeremy Mardis', 6),
 ('Jordan Edwards', 15),
 ('Jose Raul Cruz', 16),
 ('Deven Guilford', 17),
 ('David Joseph', 17),
 ('Armando Garcia-Muro', 17)]


### Simple Query 2 - Women killed by Police while not fleeing and not armed in 2016

In [3]:
query = '''
SELECT name, age, date as this_date
FROM police_killings
WHERE gender = "F"
AND flee = "Not fleeing"
AND armed = "unarmed"
AND date >= "16-01-01"
AND date < "17-01-01"
ORDER BY age;
'''
cur.execute(query)

pprint.pprint(cur.fetchall())

[]


### Simple Query 3 - People with NULL values regarding age, race and armed

In [4]:
query = '''
SELECT name, gender, date
FROM police_killings
WHERE age IS NULL
AND race_id IS NULL
AND armed IS NULL;
'''

cur.execute(query)

pprint.pprint(cur.fetchall())

[('TK TK', 'M', '11/11/16')]


## Complex Queries

In [31]:
times_sqllite = {'complex1':0,
                'complex2':0}

times_sqllite_index = {'complex1':0,
                      'complex2':0}

iter_n = 100

def record_time_f(query_n,query,with_index):
    results = 0
    if with_index is False:
        for i in range(iter_n):
            time_i = time.time()
            cur.execute(query)
            results = cur.fetchall()
            time_f = time.time()
            times_sqllite[query_n] = times_sqllite[query_n] + time_f - time_i
        times_sqllite[query_n] = times_sqllite[query_n] / iter_n
    else:
        for i in range(iter_n):
            time_i = time.time()
            cur.execute(query)
            results = cur.fetchall()
            time_f = time.time()
            times_sqllite_index[query_n] = times_sqllite_index[query_n] + time_f - time_i
        times_sqllite_index[query_n] = times_sqllite_index[query_n] / iter_n
    return results


### Complex Query 1 - People armed with a knife shot by police in the state of Texas

In [32]:
query1 = '''
SELECT p.name, p.age, s.acronym
FROM police_killings p
INNER JOIN city c
ON c.id = p.city_id
INNER JOIN state s
ON c.state_id = s.id
WHERE s.acronym = "TX"
AND p.armed = "knife";
'''

results = record_time_f('complex1', query1, False)
    
pprint.pprint(results)

[('Emmett Edward Hall', 60, 'TX'),
 ('Tyler Hunkin', 29, 'TX'),
 ('Morgan London Rankins', 30, 'TX'),
 ('Kamal Dajani', 26, 'TX'),
 ('Medger Blake', 41, 'TX'),
 ('Henry Reyna', 49, 'TX'),
 ('Daniel Brumley', 27, 'TX'),
 ('Rodney Henderson', 48, 'TX'),
 ('Richard McClendon', 43, 'TX'),
 ('Michael Clyde Lynch', 37, 'TX'),
 ('Jose Angel Vallarta', 30, 'TX'),
 ('Jose Antonio Espinoza Ruiz', 56, 'TX'),
 ('Kristiana Coignard', 17, 'TX'),
 ('Epthen Lamont Johnson', 40, 'TX'),
 ('Martin Gomez', 46, 'TX'),
 ('TK TK', None, 'TX'),
 ('Roger Albrecht', None, 'TX'),
 ('Ray Valdez', 55, 'TX'),
 ('Randall Lance Hughes', 48, 'TX'),
 ('Randall Waddel', 49, 'TX'),
 ('Gregory Mathis', 36, 'TX')]


### Complex Query 2 - State with the most shootings

In [33]:
query2 = '''
SELECT state, MAX(mycount)
FROM (SELECT s.acronym as state, COUNT(p.name) mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    GROUP BY s.acronym);
'''

results = record_time_f('complex2', query2, False)

pprint.pprint(results)

[('CA', 390)]


### Insert Query - Insert new police killing

In [None]:
john = [
    (3000, "John Cena", "21-05-12", "slap", "fidget spinner", 100, "M", 1, 1, 1, "attack", "spaceship", 1)
]

query = '''INSERT INTO police_killings (id, name, date, manner_of_death, armed, age, gender, race_id, city_id, signs_of_mental_illness, threat_level, flee, body_camera) 
                VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ,?);'''

try:
    cur.executemany(query, john)
    db.commit()
    print("Inserted successfully in police_killings")
except:
    print("Error inserting in police_killings")
    db.rollback()

### Update Query - Update the date of the police killing by the person's name

In [None]:
name = "John Cena"
date = "22-05-12"

query = "UPDATE police_killings SET date=? WHERE name=?;"

try:
    cur.execute(query, (date, name))
    db.commit()
    print("Successfully updated police_killings")
except:
    print("Error updating police_killings")
    db.rollback()

# Optimization and indexing

In [41]:
drop_index_0 = "DROP INDEX IF EXISTS police_killings_index;"
cur.execute(drop_index_0)

drop_index_1 = "DROP INDEX IF EXISTS city_index;"
cur.execute(drop_index_1)

drop_index_2 = "DROP INDEX IF EXISTS state_index;"
cur.execute(drop_index_2)

# drop_index_3 = "DROP INDEX IF EXISTS police_killings_index;"
# cur.execute(drop_index_3)

# drop_index_4 = "DROP INDEX IF EXISTS state_acronym_index;"
# cur.execute(drop_index_4)

<sqlite3.Cursor at 0x7ff431baf2d0>

In [42]:
index_2 = "CREATE INDEX IF NOT EXISTS police_killings_index ON police_killings(city_id, armed)"
cur.execute(index_2)

<sqlite3.Cursor at 0x7ff431baf2d0>

In [43]:
index_3 = "CREATE INDEX IF NOT EXISTS city_index ON city(state_id)"
cur.execute(index_3)

<sqlite3.Cursor at 0x7ff431baf2d0>

In [44]:
index_4 = "CREATE INDEX IF NOT EXISTS state_index ON state(acronym)"
cur.execute(index_4)

<sqlite3.Cursor at 0x7ff431baf2d0>

In [45]:
# print indexes that were created
created_indexes = '''
SELECT * FROM sqlite_master WHERE type = 'index';
'''
cur.execute(created_indexes)
records_query1 = cur.fetchall()
pprint.pprint(records_query1)

[('index', 'sqlite_autoindex_city_1', 'city', 5, None),
 ('index',
  'cluster_index2',
  'police_killings',
  1143,
  'CREATE INDEX cluster_index2 ON police_killings(city_id, armed)'),
 ('index',
  'police_killings_armed_index2',
  'city',
  1155,
  'CREATE INDEX police_killings_armed_index2 ON city(state_id)'),
 ('index',
  'state_acronym_index',
  'state',
  1221,
  'CREATE INDEX state_acronym_index ON state(acronym)'),
 ('index',
  'police_killings_index',
  'police_killings',
  1222,
  'CREATE INDEX police_killings_index ON police_killings(city_id, armed)'),
 ('index',
  'city_index',
  'city',
  1233,
  'CREATE INDEX city_index ON city(state_id)'),
 ('index',
  'state_index',
  'state',
  1298,
  'CREATE INDEX state_index ON state(acronym)')]


In [46]:
####### Performance testing for query 1 #############
optimized_query1 = '''
SELECT p.name, p.age
FROM police_killings p
INNER JOIN city c
ON c.id = p.city_id
INNER JOIN state s
ON c.state_id = s.id
WHERE s.acronym = "TX"
AND p.armed = "knife";
'''

#People armed with a knife shot by police in the state of Texas

results = record_time_f('complex1', optimized_query1, True)

pprint.pprint(results)

[('Emmett Edward Hall', 60),
 ('Tyler Hunkin', 29),
 ('Morgan London Rankins', 30),
 ('Kamal Dajani', 26),
 ('Medger Blake', 41),
 ('Henry Reyna', 49),
 ('Daniel Brumley', 27),
 ('Rodney Henderson', 48),
 ('Richard McClendon', 43),
 ('Michael Clyde Lynch', 37),
 ('Jose Angel Vallarta', 30),
 ('Jose Antonio Espinoza Ruiz', 56),
 ('Kristiana Coignard', 17),
 ('Epthen Lamont Johnson', 40),
 ('Martin Gomez', 46),
 ('TK TK', None),
 ('Roger Albrecht', None),
 ('Ray Valdez', 55),
 ('Randall Lance Hughes', 48),
 ('Randall Waddel', 49),
 ('Gregory Mathis', 36)]


In [47]:
####### Performance testing for query 2 #############
optimized_query2 = '''
SELECT state, MAX(mycount)
FROM (SELECT s.acronym as state, COUNT(p.name) mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    GROUP BY s.acronym);
'''

#State with the most shootings

results = record_time_f('complex2', optimized_query2, True)

pprint.pprint(results)

[('CA', 390)]


In [48]:
print('Times for complex queries without indexes and optimization:')
for t in times_sqllite:
     print(f'{t} - {times_sqllite[t]:7.6f}')

print('\nTimes for complex queries with indexes and optimization:')
for t in times_sqllite_index:
     print(f'{t} - {times_sqllite_index[t]:7.6f}')

Times for complex queries without indexes and optimization:
complex1 - 0.000815
complex2 - 0.002907

Times for complex queries with indexes and optimization:
complex1 - 0.000762
complex2 - 0.002889


In [None]:
db.close()