### Imports

In [1]:
import sqlite3
import pprint
from time import time

### Connect to the DB

In [2]:
db = sqlite3.connect("sqlite3_group10.db")
cur = db.cursor()

## Simple Queries

### Simple Query 1 - Unarmed Males under 18 killed by Police

In [3]:
simple_query_1 = '''
    SELECT name, age 
    FROM police_killings 
    WHERE age < 18 
    AND gender = "M" 
    AND armed = "unarmed" 
    ORDER BY age;
'''

results = cur.execute(simple_query_1).fetchall()

pprint.pprint(results)

[('Jeremy Mardis', 6),
 ('Jordan Edwards', 15),
 ('Jose Raul Cruz', 16),
 ('Deven Guilford', 17),
 ('David Joseph', 17),
 ('Armando Garcia-Muro', 17)]


### Simple Query 2 - Women killed by Police while not fleeing and not armed

In [4]:
simple_query_2 = '''
    SELECT name, age
    FROM police_killings
    WHERE gender = "F"
    AND flee = "Not fleeing"
    AND armed = "unarmed"
    ORDER BY age;
'''

results = cur.execute(simple_query_2).fetchall()

pprint.pprint(results)

[('Ciara Meyer', 12),
 ('Alteria Woods', 21),
 ('India Kager', 28),
 ('Autumn Steele', 34),
 ('Justine Damond', 40)]


### Simple Query 3 - People with NULL values regarding age, race and armed

In [5]:
simple_query_3 = '''
    SELECT name, gender, date
    FROM police_killings
    WHERE age IS NULL
    AND race_id IS NULL
    AND armed IS NULL;
'''

results = cur.execute(simple_query_3).fetchall()

pprint.pprint(results)

[('TK TK', 'M', '16-11-11')]


## Complex Queries

#### Complex Query 1 - People above 20 armed with a knife shot and Tasered by police in the state of Texas

In [6]:
complex_query_1 = '''
    SELECT p.name, p.age, s.acronym, p.armed
    FROM police_killings p
    LEFT JOIN city c
    ON c.id = p.city_id
    LEFT JOIN state s
    ON c.state_id = s.id
    WHERE s.acronym = "TX"
    AND p.age > 20
    AND p.manner_of_death = "shot and Tasered"
    AND p.armed = "knife";
'''

results = cur.execute(complex_query_1).fetchall()
    
for x in results:
    print(x)

('Randall Lance Hughes', 48, 'TX', 'knife')
('Henry Reyna', 49, 'TX', 'knife')
('Gregory Mathis', 36, 'TX', 'knife')
('Ray Valdez', 55, 'TX', 'knife')
('Rodney Henderson', 48, 'TX', 'knife')


#### Complex Query 2 - State with the most shootings against males under the age of 18

In [7]:
complex_query_2 = '''
SELECT state, MAX(mycount)
FROM (SELECT s.acronym as state, COUNT(p.name) mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    WHERE p.gender = "M"
    AND p.age < 18
    GROUP BY s.acronym);
'''

results = cur.execute(complex_query_2).fetchall()

for x in results:
    print(f"{{\'_id\': \'{x[0]}\', \'shootings\': {x[1]}}}")

{'_id': 'CA', 'shootings': 11}


### Insert Query - Insert new police killing

In [8]:
john = ("John Smith", "20-02-25", "shot", "toy weapon", 34, "M", 1, 1, 1, "attack", None, 1)


query = '''
    INSERT INTO police_killings (name, date, manner_of_death, armed, age, gender, race_id, city_id, signs_of_mental_illness, threat_level, flee, body_camera) 
    VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ? ,?);
'''

try:
    cur.execute(query, john)
    db.commit()
    print("Inserted successfully in police_killings")
except:
    print("Error inserting in police_killings")
    db.rollback()

Inserted successfully in police_killings


### Update Query - Update the date of the police killing by the person's name

In [9]:
query = "UPDATE police_killings SET date=? WHERE id=?;"

try:
    cur.execute(query, ("22-02-25", cur.lastrowid))
    db.commit()
    print("Successfully updated police_killings")
except:
    print("Error updating police_killings")
    db.rollback()

Successfully updated police_killings


# Optimization and indexing

In [10]:
# Number of iterations used in performance
iterations = 200

# Get average performance
def performance(query):
    result = 0
    for _ in range(iterations):
        time_i = time()
        cur.execute(query)
        time_f = time()
        result += time_f-time_i
    return result / iterations

### Dropping indexes (in case there were some)

In [11]:
drop_index_1 = "DROP INDEX IF EXISTS police_killings_index;"
cur.execute(drop_index_1)

drop_index_2 = "DROP INDEX IF EXISTS police_killings_index_2;"
cur.execute(drop_index_2)

drop_index_3 = "DROP INDEX IF EXISTS state_index;"
cur.execute(drop_index_3)

<sqlite3.Cursor at 0x7f0d86d85f40>

### Performance without indexes

In [12]:
# Get performance for all queries
performance_no_index = {
    "simple_query_1" : performance(simple_query_1),
    "simple_query_2" : performance(simple_query_2),
    "complex_query_1" : performance(complex_query_1),
    "complex_query_2" : performance(complex_query_2)
}

### Creating relevant indexes

In [13]:
index_1 = '''
CREATE INDEX IF NOT EXISTS police_killings_index ON police_killings(armed, gender, age, flee, manner_of_death, name)
'''
cur.execute(index_1)

index_2 = '''
CREATE INDEX IF NOT EXISTS police_killings_index_2 ON police_killings(gender, age, city_id)
'''
cur.execute(index_2)

# # acronyms of states cannot be repeated
index_3 = "CREATE UNIQUE INDEX IF NOT EXISTS state_index ON state(acronym)"
cur.execute(index_3)

<sqlite3.Cursor at 0x7f0d86d85f40>

### Show indexes are created

In [14]:
# print indexes that were created
created_indexes = '''
SELECT * FROM sqlite_master WHERE type = 'index';
'''
cur.execute(created_indexes)
records_query1 = cur.fetchall()
pprint.pprint(records_query1)

[('index', 'sqlite_autoindex_city_1', 'city', 5, None),
 ('index',
  'police_killings_index',
  'police_killings',
  1147,
  'CREATE INDEX police_killings_index ON police_killings(armed, gender, age, '
  'flee, manner_of_death, name)\n'),
 ('index',
  'police_killings_index_2',
  'police_killings',
  1178,
  'CREATE INDEX police_killings_index_2 ON police_killings(gender, age, '
  'city_id)\n'),
 ('index',
  'state_index',
  'state',
  1188,
  'CREATE UNIQUE INDEX state_index ON state(acronym)')]


### Optimized complex queries

In [15]:
optimized_complex_query_1 = '''
    SELECT p.name, p.age, s.acronym, p.armed
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON c.state_id = s.id
    WHERE s.acronym = "TX"
    AND p.age > 20
    AND p.manner_of_death= "shot and Tasered"
    AND p.armed = "knife";
'''

results = cur.execute(optimized_complex_query_1).fetchall()

pprint.pprint(results)

[('Gregory Mathis', 36, 'TX', 'knife'),
 ('Randall Lance Hughes', 48, 'TX', 'knife'),
 ('Rodney Henderson', 48, 'TX', 'knife'),
 ('Henry Reyna', 49, 'TX', 'knife'),
 ('Ray Valdez', 55, 'TX', 'knife')]


In [16]:
optimized_complex_query_2 = '''
    SELECT s.acronym, (COUNT(p.name)) as mycount
    FROM police_killings p
    INNER JOIN city c
    ON c.id = p.city_id
    INNER JOIN state s
    ON s.id = c.state_id
    WHERE p.age < 18
    AND p.gender = "M"
    GROUP BY s.acronym
    ORDER BY (COUNT(p.name)) DESC
    LIMIT 1;
'''

results = cur.execute(optimized_complex_query_2).fetchall()

for x in results:
    print(f"{{\'_id\': \'{x[0]}\', \'shootings\': {x[1]}}}")

{'_id': 'CA', 'shootings': 11}


### Performance with indexes and optimization

In [17]:
# Get performance for all queries
performance_index_opt = {
    "simple_query_1" : performance(simple_query_1),
    "simple_query_2" : performance(simple_query_2),
    "complex_query_1" : performance(optimized_complex_query_1),
    "complex_query_2" : performance(optimized_complex_query_2)
}

In [18]:
# Run to see what indexes is the query using
pprint.pprint(cur.execute("EXPLAIN QUERY PLAN " + complex_query_1).fetchall())

[(5, 0, 0, 'SEARCH s USING COVERING INDEX state_index (acronym=?)'),
 (9, 0, 0, 'SEARCH p USING INDEX police_killings_index (armed=?)'),
 (18, 0, 0, 'SEARCH c USING INTEGER PRIMARY KEY (rowid=?)')]


### Performance of queries with and without indexes

In [19]:
print('Times without indexes and optimization:')
pprint.pprint(performance_no_index)

print('\nTimes with indexes and optimization:')
pprint.pprint(performance_index_opt)

Times without indexes and optimization:
{'complex_query_1': 0.00013120174407958983,
 'complex_query_2': 0.0005676460266113282,
 'simple_query_1': 0.00033306717872619627,
 'simple_query_2': 0.0005672252178192138}

Times with indexes and optimization:
{'complex_query_1': 0.00010605216026306152,
 'complex_query_2': 0.00010340332984924316,
 'simple_query_1': 2.8327703475952148e-05,
 'simple_query_2': 2.2132396697998046e-05}


### Close Connection

In [20]:
db.close()