In [5]:
import sqlite3
import pprint
import time

db = sqlite3.connect("sqlite3_group10.db")
cur = db.cursor()

In [110]:
times_sqllite = {'simple1': 0, 
                 'simple2': 0,
                 'simple3': 0,
                 'complex1':0,
                 'complex2':0}

times_sqllite_index = {'simple1': 0, 
                       'simple2': 0,
                       'simple3': 0,
                       'complex1':0,
                       'complex2':0}

### Performance function

In [111]:
iter_n = 100

def record_time_f(query_n,query,with_index):
    results = ""
    if with_index is False:
        for i in range(iter_n):
            time_i = time.time()
            cur.execute(query)
            results = cur.fetchall()
            time_f = time.time()
            times_sqllite[query_n] = times_sqllite[query_n] + time_f - time_i
        times_sqllite[query_n] = times_sqllite[query_n] / iter_n
    else:
        for i in range(iter_n):
            time_i = time.time()
            cur.execute(query)
            results = cur.fetchall()
            time_f = time.time()
            times_sqllite_index[query_n] = times_sqllite_index[query_n] + time_f - time_i
        times_sqllite_index[query_n] = times_sqllite_index[query_n] / iter_n
    return results

## Simple Queries

### Simple Query 1 - Unarmed Males under 18 killed by Police

In [112]:
query = '''
SELECT name, age 
FROM police_killings 
WHERE age < 18 
AND gender = "M" 
AND armed = "unarmed" 
ORDER BY age;'''

results = record_time_f('simple1', query, False)

pprint.pprint(results)

[('Jeremy Mardis', 6),
 ('Jordan Edwards', 15),
 ('Jose Raul Cruz', 16),
 ('Deven Guilford', 17),
 ('David Joseph', 17),
 ('Armando Garcia-Muro', 17)]


### Simple Query 2 - Women killed by Police while not fleeing and not armed

In [113]:
query = '''
SELECT name, age
FROM police_killings
WHERE gender = "F"
AND flee = "Not fleeing"
AND armed = "unarmed"
ORDER BY age;
'''

results = record_time_f('simple2', query, False)

pprint.pprint(results)

[('Ciara Meyer', 12),
 ('Alteria Woods', 21),
 ('India Kager', 28),
 ('Autumn Steele', 34),
 ('Justine Damond', 40)]


### Simple Query 3 - People with NULL values regarding age, race and armed

In [114]:
query = '''
SELECT name, gender, date
FROM police_killings
WHERE age IS NULL
AND race_id IS NULL
AND armed IS NULL;
'''

results = record_time_f('simple3', query, False)

pprint.pprint(results)

[('TK TK', 'M', '16-11-11')]


## Complex Queries

### Complex Query 1 - People above 20 armed with a knife shot and Tasered by police in the state of Texas

In [115]:
query1 = '''
SELECT p.name, p.age, s.acronym, p.armed
FROM police_killings p
INNER JOIN city c
ON c.id = p.city_id
INNER JOIN state s
ON c.state_id = s.id
WHERE s.acronym = "TX"
AND p.age > 20
AND p.manner_of_death="shot and Tasered"
AND p.armed = "knife";
'''

results = record_time_f('complex1', query1, False)
    
for x in results:
    print(x)

('Henry Reyna', 49, 'TX', 'knife')
('Rodney Henderson', 48, 'TX', 'knife')
('Ray Valdez', 55, 'TX', 'knife')
('Randall Lance Hughes', 48, 'TX', 'knife')
('Gregory Mathis', 36, 'TX', 'knife')


### Complex Query 2 - State with the most shootings against males under the age of 18

In [116]:
query2 = '''
SELECT state, MAX(mycount)
FROM (SELECT s.acronym as state, COUNT(p.name) mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    WHERE p.gender = "M"
    AND p.age < 18
    GROUP BY s.acronym);
'''

results = record_time_f('complex2', query2, False)

for x in results:
    print(f"{{\'_id\': \'{x[0]}\', \'shootings\': {x[1]}}}")

{'_id': 'CA', 'shootings': 11}


### Insert Query - Insert new police killing

In [79]:
john = [
    (3000, "John Cena", "21-05-12", "slap", "fidget spinner", 100, "M", 1, 1, 1, "attack", "spaceship", 1)
]

query = '''INSERT INTO police_killings (id, name, date, manner_of_death, armed, age, gender, race_id, city_id, signs_of_mental_illness, threat_level, flee, body_camera) 
                VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ,?);'''

try:
    cur.executemany(query, john)
    db.commit()
    print("Inserted successfully in police_killings")
except:
    print("Error inserting in police_killings")
    db.rollback()

Inserted successfully in police_killings


### Update Query - Update the date of the police killing by the person's name

In [80]:
name = "John Cena"
date = "22-05-12"

query = "UPDATE police_killings SET date=? WHERE name=?;"

try:
    cur.execute(query, (date, name))
    db.commit()
    print("Successfully updated police_killings")
except:
    print("Error updating police_killings")
    db.rollback()

Successfully updated police_killings


# Optimization and indexing

### Dropping indexes (in case there were some)

In [117]:
drop_index_1 = "DROP INDEX IF EXISTS police_killings_index;"
cur.execute(drop_index_1)

drop_index_2 = "DROP INDEX IF EXISTS city_index;"
cur.execute(drop_index_2)

drop_index_3 = "DROP INDEX IF EXISTS state_index;"
cur.execute(drop_index_3)

<sqlite3.Cursor at 0x7f91206166c0>

### Creating relevant indexes

In [118]:
index_1 = "CREATE INDEX IF NOT EXISTS police_killings_index ON police_killings(city_id, armed)"
cur.execute(index_1)

<sqlite3.Cursor at 0x7f91206166c0>

In [119]:
index_2 = "CREATE INDEX IF NOT EXISTS city_index ON city(state_id)"
cur.execute(index_2)

<sqlite3.Cursor at 0x7f91206166c0>

In [120]:
# acronyms of states cannot be repeated
index_3 = "CREATE UNIQUE INDEX IF NOT EXISTS state_index ON state(acronym)"
cur.execute(index_3)

<sqlite3.Cursor at 0x7f91206166c0>

### Show indexes are created

In [121]:
# print indexes that were created
created_indexes = '''
SELECT * FROM sqlite_master WHERE type = 'index';
'''
cur.execute(created_indexes)
records_query1 = cur.fetchall()
pprint.pprint(records_query1)

[('index', 'sqlite_autoindex_city_1', 'city', 5, None),
 ('index',
  'police_killings_index',
  'police_killings',
  1152,
  'CREATE INDEX police_killings_index ON police_killings(city_id, armed)'),
 ('index',
  'city_index',
  'city',
  1164,
  'CREATE INDEX city_index ON city(state_id)'),
 ('index',
  'state_index',
  'state',
  1154,
  'CREATE UNIQUE INDEX state_index ON state(acronym)')]


### Simple queries with indexes

In [122]:
####### Performance testing for simple query 1 #############
optimized_query1 = '''
SELECT name, age 
FROM police_killings 
WHERE age < 18 
AND gender = "M" 
AND armed = "unarmed" 
ORDER BY age;'''

results = record_time_f('simple1', optimized_query1, True)

pprint.pprint(results)

[('Jeremy Mardis', 6),
 ('Jordan Edwards', 15),
 ('Jose Raul Cruz', 16),
 ('Deven Guilford', 17),
 ('David Joseph', 17),
 ('Armando Garcia-Muro', 17)]


In [123]:
####### Performance testing for simple query 2 #############
optimized_query2 = '''
SELECT name, age
FROM police_killings
WHERE gender = "F"
AND flee = "Not fleeing"
AND armed = "unarmed"
ORDER BY age;
'''

results = record_time_f('simple2', optimized_query2, True)

pprint.pprint(results)

[('Ciara Meyer', 12),
 ('Alteria Woods', 21),
 ('India Kager', 28),
 ('Autumn Steele', 34),
 ('Justine Damond', 40)]


In [124]:
####### Performance testing for simple query 3 #############
optimized_query3 = '''
SELECT name, gender, date
FROM police_killings
WHERE age IS NULL
AND race_id IS NULL
AND armed IS NULL;
'''

results = record_time_f('simple3', optimized_query3, True)

pprint.pprint(results)

[('TK TK', 'M', '16-11-11')]


### Complex queries with indexes

In [125]:
####### Performance testing for complex query 1 #############
optimized_query1 = '''
SELECT p.name, p.age, s.acronym, p.armed
FROM police_killings p
INNER JOIN city c
ON c.id = p.city_id
INNER JOIN state s
ON c.state_id = s.id
WHERE s.acronym = "TX"
AND p.age > 20
AND p.manner_of_death="shot and Tasered"
AND p.armed = "knife";
'''
#People armed with a knife shot by police in the state of Texas

results = record_time_f('complex1', optimized_query1, True)

pprint.pprint(results)

[('Henry Reyna', 49, 'TX', 'knife'),
 ('Rodney Henderson', 48, 'TX', 'knife'),
 ('Ray Valdez', 55, 'TX', 'knife'),
 ('Randall Lance Hughes', 48, 'TX', 'knife'),
 ('Gregory Mathis', 36, 'TX', 'knife')]


In [126]:
####### Performance testing for complex query 2 #############
optimized_query2 = '''
SELECT state, MAX(mycount)
FROM (SELECT s.acronym as state, COUNT(p.name) mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    WHERE p.gender = "M"
    AND p.age < 18
    GROUP BY s.acronym);
'''

#State with the most shootings

results = record_time_f('complex2', optimized_query2, True)

pprint.pprint(results)

[('CA', 11)]


### Performance of queries with and without indexes

In [127]:
print('Times for complex queries without indexes and optimization:')
for t in times_sqllite:
     print(f'{t} - {times_sqllite[t]:7.6f}')

print('\nTimes for complex queries with indexes and optimization:')
for t in times_sqllite_index:
     print(f'{t} - {times_sqllite_index[t]:7.6f}')

Times for complex queries without indexes and optimization:
simple1 - 0.000378
simple2 - 0.000971
simple3 - 0.000311
complex1 - 0.000769
complex2 - 0.001060

Times for complex queries with indexes and optimization:
simple1 - 0.000350
simple2 - 0.000895
simple3 - 0.000282
complex1 - 0.000751
complex2 - 0.001042


In [254]:
db.close()