In [1]:
import sys; sys.path.append('../')
import pandas as pd
import os

from lib.data import download, extract_zip
from lib.trr import (
    clean_trr, clean_action_response, clean_charge, clean_subject_weapon, clean_status, clean_weapon_discharge
)
from lib.sql import insert_stmts

In [2]:
trr_zip_path = download('https://www.dropbox.com/s/awry2r8e35qj4j9/2021-04-28_TRR%20data%20to%20import.zip?dl=1')
trr_dir_path = extract_zip(trr_zip_path)

In [3]:
trr = pd.concat([
    pd.read_csv(os.path.join(trr_dir_path, 'Encarnacion.TRR.csv')),
    pd.read_csv(os.path.join(trr_dir_path, 'Solano.TRR.csv')),
])
officer_dict = {
    'encarnacion': 7939,
    'solano': 26969,
}
trr.loc[:, 'officer_id'] = trr.POLAST.str.lower().map(lambda x: officer_dict[x])
trr = clean_trr(trr)
print(insert_stmts('trr_trr', trr))

INSERT INTO trr_trr (id, beat, block, direction, street, location, trr_datetime, indoor_or_outdoor, lighting_condition, weather_condition, "notify_OEMC", notify_district_sergeant, "notify_OP_command", "notify_DET_division", number_of_weapons_discharged, party_fired_first, officer_unit_id, officer_unit_detail_id, officer_assigned_beat, officer_on_duty, officer_injured, officer_in_uniform, subject_gender, subject_race, subject_birth_year, subject_armed, subject_injured, subject_alleged_injury, officer_id, created_at, updated_at) VALUES (810142, 1624, '59XX', 'West', 'MONTROSE AVE', 'GAS STATION', '2019-01-06 05:46:00', 'Indoor', 'ARTIFICIAL', 'CLEAR', 'T', 'T', 'T', 'F', NULL, NULL, 17, NULL, '1634R', 'T', 'F', 'T', 'M', 'HISPANIC', 1981, 'F', 'F', 'F', 7939, current_timestamp, current_timestamp);
INSERT INTO trr_trr (id, beat, block, direction, street, location, trr_datetime, indoor_or_outdoor, lighting_condition, weather_condition, "notify_OEMC", notify_district_sergeant, "notify_OP_co

In [4]:
trr_ids = trr['id'].unique()
ar = pd.read_csv(os.path.join(trr_dir_path, 'All.ActionsResponses.TRR.csv'))
ar = clean_action_response(ar)
ar = ar[ar.trr_id.isin(trr_ids)].reset_index(drop=True)
ar
print(insert_stmts('trr_actionresponse', ar))

INSERT INTO trr_actionresponse (trr_id, person, resistance_type, action, created_at, updated_at, id) VALUES (778883, 'Member Action', 'Active Resister', 'OPEN HAND STRIKE', current_timestamp, current_timestamp, (select max(id) + 1 from trr_actionresponse));
INSERT INTO trr_actionresponse (trr_id, person, resistance_type, action, created_at, updated_at, id) VALUES (778883, 'Member Action', 'Active Resister', 'TAKE DOWN/EMERGENCY HANDCUFFING', current_timestamp, current_timestamp, (select max(id) + 1 from trr_actionresponse));
INSERT INTO trr_actionresponse (trr_id, person, resistance_type, action, created_at, updated_at, id) VALUES (778883, 'Member Action', 'Assailant Assault/Battery', 'CLOSED HAND STRIKE/PUNCH', current_timestamp, current_timestamp, (select max(id) + 1 from trr_actionresponse));
INSERT INTO trr_actionresponse (trr_id, person, resistance_type, action, created_at, updated_at, id) VALUES (778883, 'Member Action', 'Passive Resister', 'ESCORT HOLDS', current_timestamp, curr

In [5]:
ch = pd.read_csv(os.path.join(trr_dir_path, 'All.Charges.TRR.csv'))
ch = clean_charge(ch)
ch = ch[ch.trr_id.isin(trr_ids)].reset_index(drop=True)
ch
print(insert_stmts('trr_charge', ch))

INSERT INTO trr_charge (trr_id, statute, description, created_at, updated_at, id) VALUES (778883, '720 ILCS 5.0/24-1-A-10', 'UUW -  WEAPON - PUBLIC STREET/ALLEY/LANDS', current_timestamp, current_timestamp, (select max(id) + 1 from trr_charge));
INSERT INTO trr_charge (trr_id, statute, description, created_at, updated_at, id) VALUES (778883, '8-24-020', 'SALE OR POSSESSION OF DEADLY WEAPON', current_timestamp, current_timestamp, (select max(id) + 1 from trr_charge));
INSERT INTO trr_charge (trr_id, statute, description, created_at, updated_at, id) VALUES (844519, '625 ILCS 5.0/3-707', 'INSURANCE - OPERATE MTR VEHICLE WITHOUT', current_timestamp, current_timestamp, (select max(id) + 1 from trr_charge));
INSERT INTO trr_charge (trr_id, statute, description, created_at, updated_at, id) VALUES (844519, '625 ILCS 5.0/6-303-A', 'DRIVING ON SUSPENDED LICENSE', current_timestamp, current_timestamp, (select max(id) + 1 from trr_charge));
INSERT INTO trr_charge (trr_id, statute, description, cre

In [6]:
sw = pd.read_csv(os.path.join(trr_dir_path, 'All.SubjectWeapons.TRR.csv'))
sw = clean_subject_weapon(sw)
sw = sw[sw.trr_id.isin(trr_ids)].reset_index(drop=True)
sw
print(insert_stmts('trr_subjectweapon', sw))

INSERT INTO trr_subjectweapon (trr_id, weapon_type, weapon_description, created_at, updated_at, id) VALUES (778883, 'HANDS/FISTS', NULL, current_timestamp, current_timestamp, (select max(id) + 1 from trr_subjectweapon));
INSERT INTO trr_subjectweapon (trr_id, weapon_type, weapon_description, created_at, updated_at, id) VALUES (778883, 'HANDS/FISTS', NULL, current_timestamp, current_timestamp, (select max(id) + 1 from trr_subjectweapon));
INSERT INTO trr_subjectweapon (trr_id, weapon_type, weapon_description, created_at, updated_at, id) VALUES (778883, 'MOUTH (SPIT,BITE,ETC)', NULL, current_timestamp, current_timestamp, (select max(id) + 1 from trr_subjectweapon));
INSERT INTO trr_subjectweapon (trr_id, weapon_type, weapon_description, created_at, updated_at, id) VALUES (810159, 'HANDS/FISTS', NULL, current_timestamp, current_timestamp, (select max(id) + 1 from trr_subjectweapon));
INSERT INTO trr_subjectweapon (trr_id, weapon_type, weapon_description, created_at, updated_at, id) VALUES

In [7]:
from datamatch import ThresholdMatcher, JaroWinklerSimilarity, DateSimilarity, ColumnsIndex, StringSimilarity
from lib.refs import load_officers

st = pd.read_csv(os.path.join(trr_dir_path, 'All.Statuses_OtherMembers.TRR.csv'))
st = clean_status(st)
st = st[st.trr_id.isin(trr_ids)].reset_index(drop=True)
officers = load_officers()

for col in ['last_name', 'first_name', 'middle_initial', 'gender', 'race']:
    st.loc[:, col] = st[col].fillna('').str.lower()

dfa = st[[
    'last_name', 'first_name', 'middle_initial', 'gender', 'race', 'appointed_date'
]].drop_duplicates().set_index('last_name', drop=False)
dfa.loc[:, 'fc'] = dfa.first_name.fillna('').map(lambda x: x[:1])
dfb = officers[[
    'id', 'last_name', 'first_name', 'middle_initial', 'gender', 'race', 'appointed_date'
]].drop_duplicates().set_index('id', drop=True)
dfb.loc[:, 'fc'] = dfb.first_name.fillna('').map(lambda x: x[:1])

matcher = ThresholdMatcher(dfa, dfb, ColumnsIndex(['fc']), {
    'first_name': JaroWinklerSimilarity(),
    'last_name': JaroWinklerSimilarity(),
    'middle_initial': StringSimilarity(),
    'gender': StringSimilarity(),
    'race': StringSimilarity(),
    'appointed_date': DateSimilarity()
})
pairs = matcher.get_index_pairs_within_thresholds()
match_dict = dict(pairs)
st.loc[:, 'officer_id'] = st.last_name.str.lower().map(lambda x: match_dict[x])
st = st.drop(columns=['last_name', 'first_name', 'middle_initial', 'gender', 'race', 'birth_year', 'appointed_date'])
st
print(insert_stmts('trr_trrstatus', st))

  if (await self.run_code(code, result,  async_=asy)):
INSERT INTO trr_trrstatus (trr_id, status, status_datetime, officer_id, created_at, updated_at, id) VALUES (778883, 'SUBMITTED', '2018-12-20 18:14:00', 26969, current_timestamp, current_timestamp, (select max(id) + 1 from trr_trrstatus));
INSERT INTO trr_trrstatus (trr_id, status, status_datetime, officer_id, created_at, updated_at, id) VALUES (778883, 'SUBMITTED', '2018-12-21 01:08:00', 14536, current_timestamp, current_timestamp, (select max(id) + 1 from trr_trrstatus));
INSERT INTO trr_trrstatus (trr_id, status, status_datetime, officer_id, created_at, updated_at, id) VALUES (778883, 'APPROVED', '2018-12-22 07:42:00', 7958, current_timestamp, current_timestamp, (select max(id) + 1 from trr_trrstatus));
INSERT INTO trr_trrstatus (trr_id, status, status_datetime, officer_id, created_at, updated_at, id) VALUES (778883, 'SUBMITTED', '2019-01-24 12:24:00', 24300, current_timestamp, current_timestamp, (select max(id) + 1 from trr_trrs

In [8]:
wd = pd.read_csv(os.path.join(trr_dir_path, 'All.WeaponDischarges.TRR.csv'))
wd = clean_weapon_discharge(wd)
wd = wd[wd.trr_id.isin(trr_ids)].reset_index(drop=True)
print(insert_stmts('trr_weapondischarge', wd))


