In [1]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import sklearn.metrics as skm
import sklearn.preprocessing as skp
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression, LogisticRegressionCV
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer
from sklearn import set_config
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)
set_config(display = 'diagram')

In [2]:
sheets = ['Denver Colorado', 
          'Los Angeles County California',
          'Jacksonville County Florida', 
          'Seattle County Washington', 
          'Tacoma Washington', 
          'Atlanta Georgia',
          'Bloomington Illinois',
          'Cincinnati Ohio',
          'Dallas Texas', 
          'Hampton',
          'Hartford Conneticut',
          'Indianapolis Indiana',
          'Orlando Florida',
          'Sparks Nevada',
          'Portland Oregon',
          'Louisville Kentucky',
          'Springfiled Missouri',
          'Charolette North Carolina 1',
          'Charolette North Carolina suspe',
          'Charolette North Carolina offic']

In [3]:
dictOfDfs = {}
for x in sheets:
    dictOfDfs[x] = pd.read_excel('officer_involved_shooting_all_info.xlsx', sheet_name=x)


In [4]:
# for key in dictOfDfs:
#     display(dictOfDfs[key].head())

In [5]:
finalColumns = [
    'city',
    'state',
    'id',
    'date',
    'time',
    'day_of_week',
    'incident_initiation',
    'officer_rank', 
    'suspect_sex',
    'suspect_age',
    'suspect_race',
    'suspect_ethnicity',
    'suspect_armed',
    'injury',
    'justified',
    'x_coord',
    'y_coord'
]

## General Mappings

In [176]:
race_map = {
    'White' : 'white',
    'WHITE' : 'white',
    'W' : 'white',
    'w' : 'white',
    'Black': 'black',
    'BLACK': 'black',
    'B' : 'black',
    'b' : 'black',
    'Hispanic': 'hispanic',
    'HISPANIC' : 'hispanic',
    'H' : 'hispanic',
    'h' : 'hispanic',
    'Unknown' : 'unknown',
    'UNK' : 'unknown',
    'UNKNOWN' : 'unknown'
}

sex_map = {
    'MALE': 'male',
    'Male' : 'male',
    'M' : 'male',
    'm' : 'male',
    'FEMALE' : 'female',
    'Female' : 'female',
    'F' : 'female',
    'f' : 'female'
}

## Denver

In [225]:
denver = dictOfDfs['Denver Colorado'].copy()

In [226]:
denver.columns = [
    'city', 
    'state', 
    'id', 
    'date', 
    'time', 
    'day_of_week', 
    'call_origination',
    'incident_initiation',
    'address', 
    'officer_first_last', 
    'role', 
    'officer_rank', 
    'officer_badge',  
    'officer_on_duty',
    'in_uniform', 
    'citizen_sex',
    'citizen_age',
    'citizen_race',
    'citizen_ethnicity',
    'citizen_armed',
    'citizen_shooting',
    'citizen_injury',
    'justified',
    'x_coord',
    'y_coord'
]

In [227]:
# for x in denver.columns:
#     display(denver[x].value_counts())

### Create categorical variable maps

In [228]:
suspect_armed_map = {
    'Firearm'                 : 'firearm',
    'Knife'                   : 'other',
    'None'                    :'unarmed',
    'Replica or Air Gun'      :'other',
    'Simulated Weapon'        :'unarmed',
    'Motor Vehicle'           :'other',
    'Blunt Object'            :'other',
    'Stun Gun'                :'other',
    'Shotgun'                 :'firearm'
}

In [229]:
incident_initiation_map = {
    'Weapon / Concealed Weapon'                 : 'weapon',                    
    'Warrant'                                   : 'warrant',                                    
    'Vehicle Stop'                              : 'traffic_stop',                                
    'Shots Fired'                               : 'shooting',                                  
    'Shooting'                                  : 'shooting',                                     
    'Street Robbery'                            : 'robbery',
    'Robbery - In Progress / Just Occurred'     : 'robbery'  ,       
    'Burglary - In Progess'                     : 'burglary',
    'Suspicious Vehicle'                        : 'suspicious_situation',                           
    'Surveillance'                              : 'police_surveillance' ,    
    'Domestic Violence - In Progress'           : 'dv'   ,           
    'Disturbance'                               : 'disturbance'  ,                                
    'Burglary - In Progress'                    : 'burglary' ,                      
    'Bank Robbery Suspect'                      : 'bolo',                         
    'Shots Heard/Fired'                         :'shooting'      ,                      
    'Suicidal Person / Suicide'                 : 'suicide' ,                   
    'Vehicle Check'                             : 'traffic_stop'  ,                              
    'Shot Spotter'                              : 'shooting'  ,                              
    'Robbery - Car Jacking '                    : 'robbery'  ,                      
    'Family Disturbance'                        : 'dv'   ,                        
    'Harassment in Progress Involving a Weapon' : 'weapon' ,
    'Burglary-In Progress'                      : 'burglary'     ,                    
    'Burglary/Suspicious Occurrence'            : 'burglary',
    'Man with a Gun'                            : 'weapon' ,                              
    'BOLO (Be on the lookout)'                  : 'bolo' ,                     
    'Business Robbery'                          : 'robbery'                             
}

In [230]:
denver['incident_initiation'] = denver['incident_initiation'].map(incident_initiation_map, na_action = 'ignore')
denver['citizen_armed'] = denver['citizen_armed'].map(suspect_armed_map, na_action = 'ignore')
denver['citizen_race'] = denver['citizen_race'].map(race_map)
denver['citizen_sex'] = denver['citizen_sex'].map(sex_map)


In [231]:
denver.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 127 entries, 0 to 126
Data columns (total 25 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   city                 127 non-null    object        
 1   state                127 non-null    object        
 2   id                   127 non-null    object        
 3   date                 127 non-null    datetime64[ns]
 4   time                 127 non-null    int64         
 5   day_of_week          127 non-null    object        
 6   call_origination     127 non-null    object        
 7   incident_initiation  125 non-null    object        
 8   address              127 non-null    object        
 9   officer_first_last   127 non-null    object        
 10  role                 127 non-null    object        
 11  officer_rank         78 non-null     object        
 12  officer_badge        78 non-null     object        
 13  officer_on_duty      78 non-null   

In [232]:
officers = pd.get_dummies(denver['officer_rank'])
suspects = pd.get_dummies(denver[['citizen_sex','citizen_race','citizen_ethnicity', 'citizen_armed']])
officerSuspectOneHot = pd.concat([denver['id'],officers,suspects],axis = 1)
dfDenver1 = officerSuspectOneHot.groupby(by = ['id'], axis = 0, dropna = False).sum()
dfDenver2 = denver.groupby(['id'], axis = 0, dropna = False).first()
denverEncoded = pd.merge(dfDenver2,dfDenver1, how = 'left', on = 'id').reset_index()
denverEncoded.head()
# dfDenver.drop(columns = [
#     'address', 
#     'officer_badge', 
#     'officer_on_duty', 
#     'in_uniform', 
#     'suspect_gender', 
#     'suspect_ethnicity', 
#     'suspect_armed',
#     'justified',
#     'role',
#     'officer_rank',
#     'suspect_race'
# ])


Unnamed: 0,id,city,state,date,time,day_of_week,call_origination,incident_initiation,address,officer_first_last,role,officer_rank,officer_badge,officer_on_duty,in_uniform,citizen_sex,citizen_age,citizen_race,citizen_ethnicity,citizen_armed,citizen_shooting,citizen_injury,justified,x_coord,y_coord,Corporal,Detective,Officer,Sergeant,Technician,citizen_sex_female,citizen_sex_male,citizen_race_black,citizen_race_white,citizen_ethnicity_H,citizen_ethnicity_NH,citizen_armed_firearm,citizen_armed_other,citizen_armed_unarmed
0,2020182,Denver,Colorado,2020-01-01,120,Wednesday,Citizen-Initiated,weapon,901 S Irving St,"Archuleta, Diego",Officer,Officer,P16061,On-Duty City Paid,Police Uniform,male,25,white,H,firearm,Yes,Not Injured,Investigation Pending,3132258,1680228,0,0,2,0,0,0,3,0,3,2,1,2,1,0
1,201516702,Denver,Colorado,2015-01-09,1940,Friday,Officer-Initiated,traffic_stop,5081 N Crown Blvd,"Kindell, Sharod",Subject,Officer,P08002,On-Duty City Paid,Police Uniform,male,24,black,NH,other,Yes,Injured,http://www.denverda.org/News_Release/Decision_...,3188620,1712709,0,0,1,0,0,0,2,1,1,0,2,1,1,0
2,201549266,Denver,Colorado,2015-01-26,700,Monday,Citizen-Initiated,suspicious_situation,2500 blk N Newport St / N Niagara St Alley,"Hernandez, Jessica",Subject,Officer,P05076,On-Duty City Paid,Police Uniform,female,17,white,H,other,Yes,Deceased,https://www.denverda.org/wp-content/uploads/de...,3165976,1700000,0,0,2,0,0,1,2,0,3,1,2,2,1,0
3,201621883,Denver,Colorado,2016-01-11,1529,Monday,Officer-Initiated,traffic_stop,2601 N Zuni St,"Lonergan, Ramone",Subject,Detective,P96014,On-Duty City Paid,Plain Clothes,male,32,white,NH,firearm,Yes,Deceased,https://www.denverda.org/wp-content/uploads/de...,3135850,1700412,0,1,1,0,0,0,3,0,3,1,2,3,0,0
4,201889020,Denver,Colorado,2018-02-06,1756,Tuesday,Citizen-Initiated,burglary,1873 S Alcott St,"Duran, Alexander",Subject,Corporal,P98027,On-Duty City Paid,Police Uniform,male,29,white,H,other,No,Deceased,https://www.denverda.org/wp-content/uploads/de...,3135881,1673982,1,0,0,0,0,0,2,0,2,1,1,1,1,0


## LA County

In [187]:
la_county = dictOfDfs['Los Angeles County California'].copy()
la_county.columns = [
    'id',
    'incident_initiation',
    'date',
    'address',
    'city',
    'state',
    'zip',
    'district',
    'geo_location',
    'unit_id',
    'unit_name',
    'num_officers_involved',
    'officer_race',
    'num_of_citizens',
    'citizen_race',
    'num_citizens_wounded',
    'num_citizens_killed',
    'citizen_armed',
    'citizen_armed_description',
    'x_coord',
    'y_coord'
]

In [188]:
la_county['citizen_race'] = la_county['citizen_race'].astype('str')

### Category Mappings to limit features

In [189]:
def func(x):
    if x in ['LOS ANGELES',                     
            'COMPTON'                  ,       
            'LANCASTER'                 ,      
            'PALMDALE'                   ,     
            'EAST LOS ANGELES'            ,    
            'PICO RIVERA'                  ,   
            'BELLFLOWER'                    ,   
            'LYNWOOD'                        ,  
            'NORWALK'                         , 
            'PARAMOUNT'                        ,
            'LAKEWOOD'                         ,
            'WHITTIER']:
        return x
    else:
        return 'other'

In [190]:
def func1(x):
    if x in [
        'CENTURY STN',                          
        'COMPTON STN',                          
        'EAST LA STN ',                         
        'LAKEWOOD STN' ,                        
        'SOUTH LOS ANGELES STATION',          
        'OPERATION SAFE STREETS BUREAU',      
        'PICO RIVERA STN'               ,       
        'INDUSTRY STN'                   ,      
        'LANCASTER STN'                   ,     
        'PALMDALE STN'                     ,    
        'SPECIAL ENFORCEMENT BUR'           ,   
        'TEMPLE CITY STN'                    ,  
        'SANTA CLARITA VALLEY STN'            , 
        'MAJOR CRIMES BUREAU'                  , 
        'CARSON STN'                            ,
        'NORWALK REGIONAL STN'                  ,
        'TRANSIT SERVICES BUREAU'               
    ]:
        return x
    else:
        return 'other'

In [191]:
weapon_map= {
    'A-1'   : 'firearm',
    'A-2'    :'firearm',
    'A-4'    :'unknown',
    'B-2'    : 'other',
    'B-1'    : 'other',
    'C-1'    : 'unarmed',
    'A-3'    : 'unknown',
    'D-1'    : 'unarmed',
    'B-4'    :  'unknown',
    'E-1'    :  'unarmed'
}

In [192]:
def get_specific_type(df, column, elementOfInterest,seperating_value = ','):
    def breakout_column(df,column,seperating_value):
        listOfDicts = []
        types = {}
        for x in df[column]:
            types[x] = types.get(x,0) + 1
        for x in df[column]:
            dictionary = {}
            broken_string = x.split(seperating_value)
            for y in broken_string:
                dictionary[y] = dictionary.get(y,0) + 1
            listOfDicts.append(dictionary)
        return listOfDicts
    breakoutColumn = breakout_column(df, column, seperating_value)
    return [x[elementOfInterest] if (elementOfInterest in list(x.keys())) else 0 for x in breakoutColumn]

### Officer encoding

In [193]:
la_county['city'] = la_county['city'].map(func, na_action = 'ignore')
la_county['unit'] = la_county['unit_name'].map(func1, na_action = 'ignore')
la_county['citizen_armed'] = la_county['citizen_race'].map(weapon_map, na_action = 'ignore')
la_county['citizen_armed_description'].map(weapon_map, na_action = 'ignore')
la_county['num_white_officers'] =  get_specific_type(la_county, 'officer_race', 'WHITE')
la_county['num_black_officers'] = get_specific_type(la_county, 'officer_race', 'BLACK')
la_county['num_hispanic_officers'] = get_specific_type(la_county, 'officer_race', 'HISPANIC')
la_county['num_filipino_officers'] = get_specific_type(la_county, 'officer_race', 'FILIPINO')
la_county['num_asian-pacific_officers'] = get_specific_type(la_county, 'officer_race', 'ASIAN-PACIFIC')
la_county['num_isl_officers'] = get_specific_type(la_county, 'officer_race', 'ISL')
la_county['num_asian-pacific-isl_officers'] = get_specific_type(la_county, 'officer_race', 'ASIAN-PACIFIC-ISL')
la_county['num_unknown_officers'] = get_specific_type(la_county, 'officer_race', 'UNKNOWN')

### Subject Encoding

In [194]:
la_county['num_white_persons'] = get_specific_type(la_county, 'citizen_race', 'WHITE')
la_county['num_black_persons'] = get_specific_type(la_county, 'citizen_race', 'BLACK')
la_county['num_hispanic_persons'] = get_specific_type(la_county, 'citizen_race', 'HISPANIC')
la_county['num_unknown_persons'] = get_specific_type(la_county, 'citizen_race', None)

In [195]:
la_county.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 360 entries, 0 to 359
Data columns (total 34 columns):
 #   Column                          Non-Null Count  Dtype         
---  ------                          --------------  -----         
 0   id                              360 non-null    int64         
 1   incident_initiation             360 non-null    object        
 2   date                            360 non-null    datetime64[ns]
 3   address                         360 non-null    object        
 4   city                            356 non-null    object        
 5   state                           360 non-null    object        
 6   zip                             355 non-null    float64       
 7   district                        358 non-null    float64       
 8   geo_location                    360 non-null    object        
 9   unit_id                         359 non-null    object        
 10  unit_name                       359 non-null    object        
 11  num_of

In [196]:
la_countyEncoded = la_county.copy()
la_countyEncoded.drop(columns = [
    'address',
    'zip',
    'geo_location',
    'unit_id',
    'officer_race',
    'num_of_citizens',
    'citizen_race',
    'citizen_armed_description',
    'unit'
])

Unnamed: 0,id,incident_initiation,date,city,state,district,unit_name,num_officers_involved,num_citizens_wounded,num_citizens_killed,citizen_armed,x_coord,y_coord,num_white_officers,num_black_officers,num_hispanic_officers,num_filipino_officers,num_asian-pacific_officers,num_isl_officers,num_asian-pacific-isl_officers,num_unknown_officers,num_white_persons,num_black_persons,num_hispanic_persons,num_unknown_persons
0,1000717,HIT SHOOTING INCIDENT,2016-03-30 15:25:00,other,CA,287.0,SPECIAL ENFORCEMENT BUR,1,0,1,,34.012368,-118.126934,0,1,0,0,0,0,0,0,0,0,1,0
1,1000462,HIT SHOOTING INCIDENT,2013-10-06 15:30:00,LOS ANGELES,CA,6893.0,TRANSIT SERVICES BUREAU,1,0,1,,34.006282,-118.428977,0,0,1,0,0,0,0,0,0,1,0,0
2,1000410,NON-HIT SHOOTING INCIDENT,2012-08-16 01:40:00,LOS ANGELES,CA,377.0,SOUTH LOS ANGELES STATION,1,0,0,,33.917363,-118.300408,0,0,1,0,0,0,0,0,0,1,0,0
3,1001747,HIT SHOOTING INCIDENT,2019-08-02 09:29:00,other,CA,1361.0,LAKEWOOD STN,1,1,0,,33.914619,-118.168995,1,0,0,0,0,0,0,0,0,0,0,0
4,1001627,NON-HIT SHOOTING INCIDENT,2018-11-25 16:03:00,other,CA,1127.0,LANCASTER STN,1,0,0,,,,1,0,0,0,0,0,0,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
355,1000399,HIT SHOOTING INCIDENT,2012-05-05 23:37:00,PALMDALE,CA,1197.0,LANCASTER STN,2,1,0,,34.601522,-117.832055,2,0,0,0,0,0,0,0,1,0,0,0
356,1000489,HIT SHOOTING INCIDENT,2014-06-24 21:51:00,COMPTON,CA,2824.0,COMPTON STN,2,1,1,,33.898359,-118.209500,1,0,1,0,0,0,0,0,0,2,0,0
357,1000519,NON-HIT SHOOTING INCIDENT,2015-03-20 12:44:00,LOS ANGELES,CA,375.0,SOUTH LOS ANGELES STATION,1,0,0,,33.940007,-118.291834,1,0,0,0,0,0,0,0,0,1,0,0
358,1000342,HIT SHOOTING INCIDENT,2011-02-26 03:53:00,LOS ANGELES,CA,283.0,EAST LA STN,1,1,0,,34.033240,-118.169147,0,0,1,0,0,0,0,0,0,0,1,0


## Jacksonville

### Into exploration

In [197]:
jacksonville = dictOfDfs['Jacksonville County Florida'].copy()
jacksonville.head()

Unnamed: 0,IncidentDate,IncidentNbr,OriginalIncidentType,RTRCaseNbr,StreetNbr,StreetName,StreetType,StreetDirection,ApartmentNbr,City,StateProvince,PostalCode,Zone,SubSector,IncidentLocation,OfficerName,OfficerRace,OfficerGender,OfficerAge,OfficerTenure,SubjectName,SubjectRace,SubjectGender,SubjectAge,Subjectshot,Fatal,SubjectWeapon,UOFWithinPolicy,FurtherSituationalTrainingRequired,ReferredToIA,SAOLetterLinks,RTRCaseStatus
0,2020-07-26,2020-0479170,Traffic Violations,RTR20-0023,11500,San Jose,Blvd,,,Jacksonville,Florida,,3.0,I3,"11500 San Jose Blvd\nJacksonville, Florida","Kampfe, Myers L.",W,M,25,1,"Paige, Darrell Lorenzo",B,M,21,Yes,No,Not Applicable (None),Pending,Pending,Pending,,Pending State Attorney Review
1,2020-07-04,2020-0434871,Assault/Battery,RTR20-0022,4600,Monroe Smith,Rd,,,Jacksonville,Florida,,4.0,L1,"4600 Monroe Smith Rd\nJacksonville, Florida","Gutcher, Robert O.",W,M,30,1,"Perez, Axal",W,M,17,Yes,Yes,Knife / Cutting Instr,Pending,Pending,Pending,,Pending State Attorney Review
2,2020-07-03,2020-0431344,All Other,RTR20-0021,1100,Kendall,Dr,,,Jacksonville,Florida,,2.0,D2,"1100 Kendall Dr\nJacksonville, Florida","Cross, Paul G.; Mccranie, Phillip L.",W; W,M; M,42; 44,10; 21,"Carter, Amante Tesean",B,M,28,No,No,Shotgun,Pending; Pending,Pending; Pending,Pending; Pending,,Pending State Attorney Review
3,2020-07-01,2020-0428924,Auto Theft,RTR20-0020,5200,Soutel,Dr,,,Jacksonville,Florida,,5.0,N3,"5200 Soutel Dr\nJacksonville, Florida","Mrakovich, Bryce M.",W,M,26,1,"Paul, Nalory Debaptiste",B,M,20,No,No,Not Applicable (None),Pending,Pending,Pending,,Pending RTR Review Board Hearing
4,2020-05-26,2020-0351630,Traffic Violations,RTR20-0015,13400,J. Turner Butler,Blvd,,,Jacksonville,Florida,,3.0,H1,"13400 J. Turner Butler Blvd\nJacksonville, Flo...","Ondriezek, Blaine J.",W,M,31,1,"Dunaway, John Allen",W,M,61,Yes,Yes,Personal Weapons (Hands/ Fist/ Feet/Teeth/Etc....,Pending,Pending,Pending,,Pending RTR Review Board Hearing


In [198]:
jacksonville.info()
jacksonville.head()
jacksonville.columns = [
    'date',
    'id',
    'incident_initiation',
    'id2',
    'street_number',
    'street_name',
    'street_type',
    'street_direction',
    'apartment_number',
    'city',
    'state',
    'zip',
    'zone',
    'sub_zone',
    'incident_location',
    'officer_name',
    'officer_race',
    'officer_sex',
    'officer_age',
    'officer_year_of_services',
    'citizen_name',
    'citizen_race',
    'citizen_sex',
    'citizen_age',
    'citizen_injured',
    'citizen_killed',
    'citizen_armed',
    'UOFpolicy',
    'training?',
    'IA?',
    'SAO',
    'RTR'
]

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 57 entries, 0 to 56
Data columns (total 32 columns):
 #   Column                              Non-Null Count  Dtype         
---  ------                              --------------  -----         
 0   IncidentDate                        57 non-null     datetime64[ns]
 1   IncidentNbr                         57 non-null     object        
 2   OriginalIncidentType                57 non-null     object        
 3   RTRCaseNbr                          57 non-null     object        
 4   StreetNbr                           57 non-null     int64         
 5   StreetName                          57 non-null     object        
 6   StreetType                          56 non-null     object        
 7   StreetDirection                     13 non-null     object        
 8   ApartmentNbr                        1 non-null      float64       
 9   City                                57 non-null     object        
 10  StateProvince               

### Combining encoding for categorical variables with too many types. 

In [199]:
original_incident_map = {
    'Traffic Violations'               : 'traffic_stop', 
    'All Other'                         :'other',
    'Robbery'                           :'robbery',
    'Assault/Battery'                   :'assault',
    'Armed Robbery'                     :'robbery',
    'Suspicious Person'                 :'suspicious_situation',
    'Traffic Stop'                      :'traffic_stop',
    'Narcotics Investigation'           :'police_surveillance',
    'Burglary'                          :'burglary',
    'Auto Theft'                        :'robbery',
    'Armed Dispute'                     :'weapon',
    'Person Shot'                       :'shooting',
    'Domestic Battery'                  :'dv',
    'Domestic Violence'                 :'dv',
    'Aggravated Battery'                :'assault',
    'Domestic'                          :'dv',
    'Intoxicated Person'                :'intoxication',
    'Suicide'                           :'suicide',
    'Armed Person'                      :'weapon',
    'Armed Prowler'                     :'weapon',
    'Suicide Threat / Armed Dispute'    :'suicide',
    'Murder'                            :'bolo',
    'Search Warrant'                    :'warrant',
    'Carjacking'                        :'robbery',
    'Shots fired'                       :'shooting',
    'Abduction/Kidnap'                  :'abduction',
    'Wanted Person'                     :'bolo',
    'Bomb Investigation'                :'police_surveillance'
}

In [200]:
subject_weapon_map = {
    'Handgun'                                :'firearm',
    'Knife / Cutting Instr'                  :'melee',
    'Vehicle'                                :'other',
    'Rifle'                                  :'firearm',
    'Shotgun'                                :'firearm',
    'Not Applicable (None)'                  :'None',
    'Firearm (Type Not Stated)'              :'firearm',
    'Taser'                                  :'other',
    'Knife / Cutting Instr; Poison (Includes Gas)' : 'melee',
    'Replica Handgun'                        :'other',
    'Knife'                                  :'melee',
    'Personal Weapons (Hands/ Fist/ Feet/Teeth/Etc.); Simulated Weapon' : 'None',
    'Sword'                                  :'melee',
    'BB Pistol'                              :'other',
    'None'                                   :'None'
}

In [201]:
jacksonville['citizen_armed'] = jacksonville['citizen_armed'].map(subject_weapon_map)
jacksonville['incident_initiation'] = jacksonville['incident_initiation'].map(original_incident_map)

In [202]:
def split_num_string(column):
    temp = []
    for x in column:
        try:
            thing = x.split('; ')
            newTemp = []
            for y in thing:
                newTemp.append(int(y))
            temp.append(newTemp)
        except:
            temp.append(x)
    return temp

def check_int_or_list(element):
    temp = []
    for x in element:
        try:
            temp.append(sum(x)/len(x))
        except:
            temp.append(x)
    return temp

### Encoding poorly formatted string data

#### Officer columns

In [32]:
jacksonville['officer_age'] = split_num_string(jacksonville['officer_age']) 
jacksonville['officer_year_of_services'] = split_num_string(jacksonville['officer_year_of_services'])
jacksonville['num_white_officers'] =  get_specific_type(jacksonville, 'officer_race', 'W', seperating_value='; ')
jacksonville['num_black_officers'] = get_specific_type(jacksonville, 'officer_race', 'B', seperating_value='; ')
jacksonville['num_hispanic_officers'] = get_specific_type(jacksonville, 'officer_race', 'H', seperating_value='; ')
jacksonville['num_asian_officers'] = get_specific_type(jacksonville, 'officer_race', 'A', seperating_value='; ')
jacksonville['average_officer_age'] = check_int_or_list(jacksonville['officer_race'])
jacksonville['average_officer_tenure'] = check_int_or_list(jacksonville['officer_year_of_services'])


#### Subject Columns

In [33]:
jacksonville['citizen_age'] = split_num_string(jacksonville['citizen_age']) 
jacksonville['num_white_citizens'] =  get_specific_type(jacksonville, 'citizen_race', 'W', seperating_value='; ')
jacksonville['num_black_citizens'] = get_specific_type(jacksonville, 'citizen_race', 'B', seperating_value='; ')
jacksonville['num_hispanic_citizens'] = get_specific_type(jacksonville, 'citizen_race', 'H', seperating_value='; ')
jacksonville['num_asian_citizens'] = get_specific_type(jacksonville, 'citizen_race', 'A', seperating_value='; ')
jacksonville['average_citizen_age'] = check_int_or_list(jacksonville['citizen_age'])

In [34]:
jacksonville.incident_initiation.value_counts()

traffic_stop            11
robbery                 10
other                    7
assault                  5
police_surveillance      4
weapon                   3
suspicious_situation     3
dv                       3
bolo                     2
burglary                 2
suicide                  2
shooting                 2
abduction                1
intoxication             1
warrant                  1
Name: incident_initiation, dtype: int64

In [35]:
# jacksonville['OriginalIncidentType'] = jacksonville['OriginalIncidentType'].map(original_incident_map, na_action = 'ignore')
# jacksonville.OriginalIncidentType.value_counts()

In [36]:
jacksonville['citizen_armed'].value_counts()

firearm    33
melee      11
other       7
None        4
Name: citizen_armed, dtype: int64

In [37]:
jacksonvilleEncoded  = jacksonville.copy()
jacksonvilleEncoded.drop(columns = [
    'date',
    'id',
    'id2',
    'incident_initiation',
    'street_number',
    'street_name',
    'street_type',
    'street_direction',
    'apartment_number',
    'officer_name',
    'UOFpolicy',
    'training?',
    'IA?',
    'SAO',
    'RTR'
])

Unnamed: 0,city,state,zip,zone,sub_zone,incident_location,officer_race,officer_sex,officer_age,officer_year_of_services,citizen_name,citizen_race,citizen_sex,citizen_age,citizen_injured,citizen_killed,citizen_armed,num_white_officers,num_black_officers,num_hispanic_officers,num_asian_officers,average_officer_age,average_officer_tenure,num_white_citizens,num_black_citizens,num_hispanic_citizens,num_asian_citizens,average_citizen_age
0,Jacksonville,Florida,,3.0,I3,"11500 San Jose Blvd\nJacksonville, Florida",W,M,25,1,"Paige, Darrell Lorenzo",B,M,21,Yes,No,,1,0,0,0,W,1.0,0,1,0,0,21
1,Jacksonville,Florida,,4.0,L1,"4600 Monroe Smith Rd\nJacksonville, Florida",W,M,30,1,"Perez, Axal",W,M,17,Yes,Yes,melee,1,0,0,0,W,1.0,1,0,0,0,17
2,Jacksonville,Florida,,2.0,D2,"1100 Kendall Dr\nJacksonville, Florida",W; W,M; M,"[42, 44]","[10, 21]","Carter, Amante Tesean",B,M,28,No,No,firearm,2,0,0,0,W; W,15.5,0,1,0,0,28
3,Jacksonville,Florida,,5.0,N3,"5200 Soutel Dr\nJacksonville, Florida",W,M,26,1,"Paul, Nalory Debaptiste",B,M,20,No,No,,1,0,0,0,W,1.0,0,1,0,0,20
4,Jacksonville,Florida,,3.0,H1,"13400 J. Turner Butler Blvd\nJacksonville, Flo...",W,M,31,1,"Dunaway, John Allen",W,M,61,Yes,Yes,,1,0,0,0,W,1.0,1,0,0,0,61
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
52,Jacksonville,Florida,32218.0,6.0,P1,"11050 Harts Rd\nJacksonville, Florida",W,M,28,2,"Kimbrough, Edward",B,M,14,Yes,No,other,1,0,0,0,W,2.0,0,1,0,0,14
53,Jacksonville,Florida,32211.0,2.0,E2,"212 Century St\nJacksonville, Florida",W,M,36,9,"Graham, Jerry",B,M,34,Yes,Yes,firearm,1,0,0,0,W,9.0,0,1,0,0,34
54,Jacksonville,Florida,32220.0,5.0,O2,"9452 Old Plank Rd\nJacksonville, Florida",W; W,M; M,"[43, 46]","[18, 22]","Compo, Richard",W,M,37,Yes,Yes,firearm,2,0,0,0,W; W,20.0,1,0,0,0,37
55,Baldwin,Florida,32234.0,5.0,O2,"100 Delmonte St\nBaldwin, Florida",W,M,54,12,"Brooks, Robert",B,M,26,No,No,,1,0,0,0,W,12.0,0,1,0,0,26


## Seattle Washington

### Intro Exploration

In [44]:
seattle = dictOfDfs['Seattle County Washington'].copy()
seattle.info()
seattle.columns = [
    'thing1',
    'id',
    'date/time',
    'address',
    'y_coord',
    'x_coord',
    'city',
    'state',
    'officer_rank',
    'officer_sex',
    'officer_race',
    'officer_year_of_service',
    'officer_injured',
    'num_of_rounds',
    'citizen_sex',
    'citizen_race',
    'citizen_DOB',
    'citizen_age',
    'citizen_armed',
    'citizen_weapon',
    'citizen_killed',
    'on_duty',
    'disposition',
    'officer_disciplined',
    'summary'
] 
seattle.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 25 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   FRB #                 75 non-null     object        
 1   Incident Number       156 non-null    int64         
 2   Date / Time           156 non-null    datetime64[ns]
 3   Blurred Address       156 non-null    object        
 4   Longitude             156 non-null    float64       
 5   Latitude              156 non-null    float64       
 6   City                  156 non-null    object        
 7   State                 156 non-null    object        
 8   Rank                  156 non-null    object        
 9   Officer Gender        156 non-null    object        
 10  Officer Race          156 non-null    object        
 11  Years of SPD Service  154 non-null    object        
 12  Officer Injured       145 non-null    object        
 13  Number of Rounds    

Unnamed: 0,thing1,id,date/time,address,y_coord,x_coord,city,state,officer_rank,officer_sex,officer_race,officer_year_of_service,officer_injured,num_of_rounds,citizen_sex,citizen_race,citizen_DOB,citizen_age,citizen_armed,citizen_weapon,citizen_killed,on_duty,disposition,officer_disciplined,summary
0,FRB 05-01,20050000118193,2005-03-21 18:28:00,65XX BLOCK OF RAINIER AV S,-122.273741,47.543815,Seattle,WA,Officer,Male,White,20,No,1,Male,Asian,1975-11-03T00:00:00.000,29,Yes,Knife,Yes,Yes,Missing,Missing,"On March 21st, 2005, at approximately 5:20 P.M..."
1,FRB 05-03,20050000174022,2005-04-29 03:30:00,65XX BLOCK OF 1 AV S,-122.334513,47.544177,Seattle,WA,Officer,Male,White,5,No,2,Male,White,1969-08-02T00:00:00.000,36,No,,Yes,Yes,Justified,No,"On April 29th, 2005, at approximately 3:35 A.M..."
2,FRB 05-04,20050000256303,2005-06-20 11:30:00,7XX BLOCK OF STEWART ST,-122.335725,47.61462,Seattle,WA,Officer,Male,White,15,No,1,Male,White,1952-11-15T00:00:00.000,53,Yes,"Grenade, backpack reported to have explosives",Yes,Yes,Justified,No,"On June 20th, 2005, at approximately 11:30 A.M..."
3,FRB 05-04,20050000256303,2005-06-20 12:30:00,7XX BLOCK OF STEWART ST,-122.335725,47.61462,Seattle,WA,Officer,Male,White,18,No,1,Male,White,1952-11-15T00:00:00.000,53,Yes,"Grenade, backpack reported to have explosives",Yes,Yes,Justified,No,"On June 20th, 2005, at approximately 11:30 A.M..."
4,FRB 05-05,20050000286240,2005-07-08 13:48:00,16 AV / E UNION ST,-122.311474,47.612908,Seattle,WA,Officer,Male,White,4,Yes,1,Male,Black or African American,1957-01-28T00:00:00.000,48,Yes,Screwdriver,No,Yes,Justified,No,"On July 8th, 2005, at approximately 1:45 P.M.,..."


### Collapsing string categories

In [45]:
type_of_weapon_map = {
    'Handgun'                                       :'firearm',
    'Knife'                                         :'melee',
    'Gun'                                           :'firearm',
    'Vehicle'                                       :'other',
    'Multiple Types'                                :'other',
    'Multiple Firearms'                             :'firearm',
    'Rifle'                                         :'firearm',
    'Metal Bar'                                     :'melee',
    'Grenade, backpack reported to have explosives' :'explosive',
    '.22 caliber pistol'                            :'firearm',
    'Colt Revolver'                                 :'firearm',
    '6 shot .357 revolver'                          :'firearm',
    'Rifle w/ bayonet'                              :'firearm',
    'broken bottle'                                 :'melee',
    'Board'                                         :'melee',
    'Screwdriver'                                   :'melee',
    '9mm semi-automatic'                            :'firearm',
    'Air soft rifle'                                :'other',
    '.357 revolver'                                 :'firearm',
    'Semil automatic .38 caliber handgun'           :'firearm',
    'Mac-10, 9 mm machine pistol'                   :'firearm'
}

In [46]:
rank_map = {
    'Officer'                  :'officer',
    'POLICE OFFICE'            :'officer',
    'Sergeant'                 :'sergeant',
    'Detective'                :'detective',
    'POLICE OFFIVER PROBATION' :'officer',
    'Student Officer'          :'student_officer',
    'POLICE OFFICER DETECTIVE' :'detective',
    'POLICE LIEUTENANT'        :'lieutenant',
    'FTO'                      :'officer',
    'POLICE SERGEANT'          :'sergeant'
}

In [47]:
subject_race_map = {
    'White'                        :'white',
    'Black or African American'    :'black',
    'Asian'                        :'asian',
    'Native American'              :'native_american',
    'Not Specified'                :'unknown',
    'Hispanic'                     :'hispanic',
    'Nat Hawaiian/Oth Pac Islander':'islander'
}

In [48]:
officer_race_map = {
    'White'                        :'white',
    'AI/AN'                        :'native_american',
    'Black or African American '   :'black',
    'Hispanic/Lation'              :'latino',
    'Asian/Pacific Islander'       :'pacific-islander',
    'Black'                        :'black',
    'Hispanic or Latino'           :'latino',
    'Multi-Racial'                 :'multi-racial',
    'Asian'                        :'asian',
    'American Indian/Alaska Native':'native_american',
    'Two or More Races'            :'multi-racial',
    'Nat Hawaiian/Oth Pac Islander':'pacific-islander',
}

In [49]:
seattle['citizen_weapon'] = seattle['citizen_weapon'].map(type_of_weapon_map)
seattle['officer_rank'] = seattle['officer_rank'].map(rank_map)
seattle['officer_race'] = seattle['officer_race'].map(officer_race_map)
seattle['citizen_race'] = seattle['citizen_race'].map(subject_race_map)

In [53]:
seattle.drop(columns = [
    'thing1',
    'citizen_DOB',
    'address',

   
    'num_of_rounds',
   
    'on_duty',
    'disposition',
    'officer_disciplined',
    'summary'
]).info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 156 entries, 0 to 155
Data columns (total 17 columns):
 #   Column                   Non-Null Count  Dtype         
---  ------                   --------------  -----         
 0   id                       156 non-null    int64         
 1   date/time                156 non-null    datetime64[ns]
 2   y_coord                  156 non-null    float64       
 3   x_coord                  156 non-null    float64       
 4   city                     156 non-null    object        
 5   state                    156 non-null    object        
 6   officer_rank             96 non-null     object        
 7   officer_sex              156 non-null    object        
 8   officer_race             147 non-null    object        
 9   officer_year_of_service  154 non-null    object        
 10  officer_injured          145 non-null    object        
 11  citizen_sex              156 non-null    object        
 12  citizen_race             156 non-nul

### Encoding Officer and Subject Information

In [54]:
officers = pd.get_dummies(seattle[
     [
         'id',
         'officer_rank', 
         'officer_sex', 
         'officer_race',  
         'officer_injured',
         'officer_disciplined'
     ]
])
subjects = pd.get_dummies(seattle[
    [
        'citizen_sex',
        'citizen_race',
        'citizen_weapon'
    ]
])
officersSubjects = pd.concat([ officers, subjects], axis = 1)
seattleEncoded = pd.concat([seattle.groupby(['id']).first(), officersSubjects.groupby(['id']).sum()], 
                           axis = 1).reset_index()
seattleEncoded

Unnamed: 0,id,thing1,date/time,address,y_coord,x_coord,city,state,officer_rank,officer_sex,officer_race,officer_year_of_service,officer_injured,num_of_rounds,citizen_sex,citizen_race,citizen_DOB,citizen_age,citizen_armed,citizen_weapon,citizen_killed,on_duty,disposition,officer_disciplined,summary,officer_rank_detective,officer_rank_lieutenant,officer_rank_officer,officer_rank_sergeant,officer_rank_student_officer,officer_sex_Female,officer_sex_Male,officer_race_asian,officer_race_black,officer_race_latino,officer_race_multi-racial,officer_race_native_american,officer_race_pacific-islander,officer_race_white,officer_injured_No,officer_injured_Yes,officer_disciplined_Missing,officer_disciplined_No,officer_disciplined_Yes,citizen_sex_Female,citizen_sex_Male,citizen_race_asian,citizen_race_black,citizen_race_hispanic,citizen_race_islander,citizen_race_native_american,citizen_race_unknown,citizen_race_white,citizen_weapon_explosive,citizen_weapon_firearm,citizen_weapon_melee,citizen_weapon_other
0,20050000118193,FRB 05-01,2005-03-21 18:28:00,65XX BLOCK OF RAINIER AV S,-122.273741,47.543815,Seattle,WA,officer,Male,white,20,No,1,Male,asian,1975-11-03T00:00:00.000,29,Yes,melee,Yes,Yes,Missing,Missing,"On March 21st, 2005, at approximately 5:20 P.M...",0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,1,0,0,0,1,1,0,0,0,0,0,0,0,0,1,0
1,20050000174022,FRB 05-03,2005-04-29 03:30:00,65XX BLOCK OF 1 AV S,-122.334513,47.544177,Seattle,WA,officer,Male,white,5,No,2,Male,white,1969-08-02T00:00:00.000,36,No,,Yes,Yes,Justified,No,"On April 29th, 2005, at approximately 3:35 A.M...",0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,1,0,0,0,0
2,20050000256303,FRB 05-04,2005-06-20 11:30:00,7XX BLOCK OF STEWART ST,-122.335725,47.614620,Seattle,WA,officer,Male,white,15,No,1,Male,white,1952-11-15T00:00:00.000,53,Yes,explosive,Yes,Yes,Justified,No,"On June 20th, 2005, at approximately 11:30 A.M...",0,0,2,0,0,0,2,0,0,0,0,0,0,2,2,0,0,2,0,0,2,0,0,0,0,0,0,2,2,0,0,0
3,20050000286240,FRB 05-05,2005-07-08 13:48:00,16 AV / E UNION ST,-122.311474,47.612908,Seattle,WA,officer,Male,white,4,Yes,1,Male,black,1957-01-28T00:00:00.000,48,Yes,melee,No,Yes,Justified,No,"On July 8th, 2005, at approximately 1:45 P.M.,...",0,0,1,0,0,0,1,0,0,0,0,0,0,1,0,1,0,1,0,0,1,0,1,0,0,0,0,0,0,0,1,0
4,20050000388203,FRB 05-06,2005-09-10 03:45:00,3XX BLOCK OF 9 AV,-122.323444,47.604128,Seattle,WA,officer,Male,white,5,No,1,Female,white,1962-02-18T00:00:00.000,44,No,,No,Yes,Not Justified,Yes,"On September 10th, 2005, at approximately 3:45...",0,0,1,0,0,0,1,0,0,0,0,0,0,1,1,0,0,0,1,1,0,0,0,0,0,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
72,20190000048393,,2019-02-07 03:22:00,105XX BLOCK MIDVALE AV N,-122.343340,47.705960,Seattle,WA,,Male,white,17,,3,Male,unknown,1900-01-01T00:00:00.000,117,Yes,melee,Yes,Yes,Within Policy,No,"On the 7th of February 2019, at 3:22 A.M., the...",0,0,0,0,0,0,2,0,0,0,0,0,0,1,0,0,0,2,0,0,2,0,0,0,0,0,2,0,0,0,2,0
73,20190000098802,,2019-03-19 21:20:00,1600 BLOCK MELROSE AV,-122.327899,47.615472,Seattle,WA,,Male,white,3,,Multiple,Male,asian,1982-02-28T00:00:00.000,37,Yes,firearm,No,Yes,Within Policy,No,"On the 19th of March 2019, at 9:21 P.M., Settl...",0,0,0,0,0,0,4,0,0,0,0,0,0,4,0,0,0,4,0,0,4,4,0,0,0,0,0,0,0,4,0,0
74,20190000160099,,2019-05-05 02:07:00,OCCIDENTAL AV / YESLER WY,-122.332874,47.601721,Seattle,WA,,Male,white,6,,2,Male,black,1900-01-01T00:00:00.000,118,Yes,firearm,No,Yes,Within Policy,No,"On the 5th of May 2019, at approximately 2:05 ...",0,0,0,0,0,0,1,0,0,0,0,0,0,1,0,0,0,1,0,0,1,0,1,0,0,0,0,0,0,1,0,0
75,20190000165328,,2019-05-08 19:24:00,600 BLOCK 3 AV W,-122.360624,47.625198,Seattle,WA,,Male,multi-racial,5,,2,Male,unknown,1900-01-01T00:00:00.000,118,Yes,melee,Yes,Yes,Within Policy,No,"On the 8th of May 2019, at approximately 7:15 ...",0,0,0,0,0,0,2,0,0,0,1,0,0,1,0,0,0,2,0,0,2,0,0,0,0,0,2,0,0,0,2,0


In [None]:
# seattle.drop(columns = ['FRB #', 'Incident Number','Blurred Address','Subject DOB','On-duty',
#        'Disposition', 'Officer Disciplined?', 'Summary'])

## Tacoma Washington

### Intro Exploration

In [114]:
tacoma = dictOfDfs['Tacoma Washington'].copy()
tacoma.columns = [
    'city',
    'state',
    'IA',
    'id',
    'address',
    'district',
    'sector',
    'date',
    'date2',
    'day_of_week',
    'time',
    'hour',
    'Inside/Outside',
    'officer_race',
    'officer_sex',
    'officer_age',
    'officer_years_of_service',
    'citizen_race', 
    'citizen_sex',
    'citizen_age',
    'Dipisition',
    'Action_Taken'
]
tacoma = tacoma.drop(columns = [
    'IA',
    'date2',
    'hour',
    'Inside/Outside',
   
    'Dipisition',
    'Action_Taken'
])
tacoma.info()
tacoma.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 36 entries, 0 to 35
Data columns (total 16 columns):
 #   Column                    Non-Null Count  Dtype         
---  ------                    --------------  -----         
 0   city                      36 non-null     object        
 1   state                     36 non-null     object        
 2   id                        36 non-null     int64         
 3   address                   36 non-null     object        
 4   district                  35 non-null     object        
 5   sector                    35 non-null     float64       
 6   date                      36 non-null     datetime64[ns]
 7   day_of_week               36 non-null     object        
 8   time                      36 non-null     object        
 9   officer_race              36 non-null     object        
 10  officer_sex               36 non-null     object        
 11  officer_age               36 non-null     int64         
 12  officer_years_of_service

Unnamed: 0,city,state,id,address,district,sector,date,day_of_week,time,officer_race,officer_sex,officer_age,officer_years_of_service,citizen_race,citizen_sex,citizen_age
0,Tacoma,Washington,1918302031,"3840 Pacific Ave\nTacoma, WA\n(47.221717, -122...",TA43,4.0,2019-07-03,Tues,1899-12-31T21:01:00.000,White,Male,23,1.0,Asian,Female,56
1,Tacoma,Washington,1920200728,"500 S 40th St\nTacoma, WA\n(47.220858, -122.43...",TA43,4.0,2019-07-21,Sun,1899-12-31T10:46:00.000,White,Male,39,10.0,Black,Male,28
2,Tacoma,Washington,1919400316,"1623 E J St\nTacoma, WA\n(47.249159, -122.422549)",TA14,1.0,2019-07-16,Sat,1899-12-31T04:05:00.000,White,Male,45,20.0,White,Male,69
3,Tacoma,Washington,1925100213,"3400 E Portland Ave\nTacoma, WA\n(47.23279, -1...",TA41,4.0,2019-09-24,Sun,1899-12-31T02:36:00.000,White,Male,33,4.0,Black,Male,24
4,Tacoma,Washington,1919400316,"1623 E J St\nTacoma, WA\n(47.249159, -122.422549)",TA14,1.0,2019-07-16,Sat,1899-12-31T04:05:00.000,White,Male,28,0.0,White,Male,69


In [115]:
for columns in tacoma.columns:
    print(tacoma[columns].value_counts())

Tacoma    36
Name: city, dtype: int64
Washington    36
Name: state, dtype: int64
1800701780    7
1633501301    4
1919400316    4
1725301029    3
1730901676    2
130581374     1
1621901280    1
152171144     1
133290122     1
1918302031    1
1920200728    1
122821213     1
1600300777    1
151420459     1
1602801965    1
122440754     1
131800756     1
1925100213    1
151301177     1
140571388     1
123571013     1
Name: id, dtype: int64
425 S 59th St\nTacoma, Washington\n(47.203686, -122.437797)           7
1623 E J St\nTacoma, WA\n(47.249159, -122.422549)                     4
413 E 52nd St\nTacoma, Washington\n(47.209948, -122.42594)            4
5620 S Lawrence St\nTacoma, Washington\n(47.205388, -122.479098)      3
300 S 84th St\nTacoma, Washington\n(47.18097, -122.435594)            2
2800 Portland Ave\nTacoma, Washington\n(47.2388, -122.41045)          1
3228 S Union Ave\nTacoma, Washington\n(47.230454, -122.48364)         1
600 E 82nd St\nTacoma, Washington\n(47.18272, -122.42357

### Fixing Data Switching between columns

In [116]:
def the_switchero(df, column1):
    newColumn1 = []
    for x in df[column1]:
        if (x == 'Male') | (x == 'Female'):
            newColumn1.append(1)
        else:
            newColumn1.append(0)
    df['bad_gender'] = newColumn1
    corrected_gender = []
    corrected_race = []
    for z,k in df.iterrows():
        if k['bad_gender'] == 1:
            corrected_gender.append(k['citizen_race'])
            corrected_race.append(k['citizen_sex'])
        else:
            corrected_gender.append(k['citizen_sex'])
            corrected_race.append(k['citizen_race'])
    return corrected_gender,corrected_race



In [120]:
tacoma['corrected_citizen_sex'], tacoma['corrected_citizen_race'] = the_switchero(tacoma, 'citizen_race')

### Officer and Citizen Encoding

In [121]:
officerCitizenCat = pd.get_dummies(tacoma[['id','officer_race', 'officer_sex','corrected_citizen_race', 'corrected_citizen_sex']])
tacomaEncoded = pd.concat([tacoma.groupby('id').first(), officerCitizenCat.groupby('id').sum()],axis = 1)


In [122]:
tacomaEncoded.head()

Unnamed: 0_level_0,city,state,address,district,sector,date,day_of_week,time,officer_race,officer_sex,officer_age,officer_years_of_service,citizen_race,citizen_sex,citizen_age,bad_gender,corrected_citizen_gender,corrected_citizen_race,corrected_citizen_sex,officer_race_Asian,officer_race_Black,officer_race_Hispanic,officer_race_White,officer_sex_Female,officer_sex_Male,corrected_citizen_race_Asian,corrected_citizen_race_Black,corrected_citizen_race_Hispanic,corrected_citizen_race_Nat Am,corrected_citizen_race_Native Amer,corrected_citizen_race_White,corrected_citizen_sex_Female,corrected_citizen_sex_Male
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1
122440754,Tacoma,Washington,"809 S M St\nTacoma, Washington\n(47.254333, -1...",TA12,1.0,2012-08-31,Fri,15:40:00,White,Male,40,6.0,Black,Male,29,0,Male,Black,Male,0,0,0,1,0,1,0,1,0,0,0,0,0,1
122821213,Tacoma,Washington,"600 E 82nd St\nTacoma, Washington\n(47.18272, ...",TA44,4.0,2012-10-09,Mon,21:41:00,White,Male,29,5.0,White,Male,30,0,Male,White,Male,0,0,0,1,0,1,0,0,0,0,0,1,0,1
123571013,Tacoma,Washington,"3567 S Fawcett Ave\nTacoma, Washington\n(47.22...",TA43,4.0,2012-12-24,Sat,21:49:00,White,Male,36,7.0,Native Amer,Female,22,0,Female,Native Amer,Female,0,0,0,1,0,1,0,0,0,0,1,0,1,0
130581374,Tacoma,Washington,"3529 McKinley Ave\nTacoma, Washington\n(47.229...",TA41,4.0,2013-02-27,Wed,22:41:00,White,Male,38,3.0,White,Male,35,0,Male,White,Male,0,0,0,1,0,1,0,0,0,0,0,1,0,1
131800756,Tacoma,Washington,"2800 Portland Ave\nTacoma, Washington\n(47.238...",TA41,4.0,2013-06-29,Sat,15:05:00,White,Female,35,6.0,Hispanic,Male,25,0,Male,Hispanic,Male,0,0,0,1,1,0,0,0,1,0,0,0,0,1


## Cincinnati Ohio

In [159]:
cincinnati = dictOfDfs['Cincinnati Ohio'].copy()
cincinnati = cincinnati.drop(columns = [
    'INCIDENT_LOCATION_X',
    'INCIDENT_NO',
    'CFS_NO',
    'FIREARM_MAKE',
    'FIREARM_MODEL',
    'SNA_NEIGHBORHOOD',
    'CPD_NEIGHBORHOOD',
    'COMMUNITY_COUNCIL_NEIGHBORHOOD'
])
cincinnati.info()
cincinnati.columns = [
    'city',
    'state',
    'district',
    'x_coord',
    'y_coord',
    'date',
    'id',
    'incident_description',
    'citizen_sex',
    'citizen_race',
    'officer_sex',
    'officer_race'
]
cincinnati.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 237 entries, 0 to 236
Data columns (total 12 columns):
 #   Column                Non-Null Count  Dtype         
---  ------                --------------  -----         
 0   City                  237 non-null    object        
 1   State                 237 non-null    object        
 2   DISTRICT              233 non-null    object        
 3   LATITUDE_X            139 non-null    float64       
 4   LONGITUDE_X           139 non-null    float64       
 5   INCIDENT_DATE         237 non-null    datetime64[ns]
 6   CASE_NO               237 non-null    object        
 7   INCIDENT_DESCRIPTION  237 non-null    object        
 8   SUBJECT_GENDER        132 non-null    object        
 9   SUBJECT_RACE          132 non-null    object        
 10  OFFICER_GENDER        237 non-null    object        
 11  OFFICER_RACE          237 non-null    object        
dtypes: datetime64[ns](1), float64(2), object(9)
memory usage: 22.3+ KB


Unnamed: 0,city,state,district,x_coord,y_coord,date,id,incident_description,citizen_sex,citizen_race,officer_sex,officer_race
0,Cincinnati,Ohio,DISTRICT 2,,,2006-01-29 12:57:00,2006-78254,WEAPON DISCHARGE AT AN ANIMAL,,,MALE,BLACK
1,Cincinnati,Ohio,DISTRICT 3,,,2016-01-11 20:00:00,2016-209015,USE OF FORCE INVESTIGATION,MALE,WHITE,MALE,WHITE
2,Cincinnati,Ohio,TRAINING SECTION,,,2013-06-03 07:40:00,2013-181381,ACCIDENTAL DISCHARGE,OTHER,UNKNOWN,MALE,WHITE
3,Cincinnati,Ohio,CENTRAL BUSINESS DISTRICT,,,2016-08-07 07:41:00,2016-215911,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE
4,Cincinnati,Ohio,DISTRICT 5,,,2018-05-03 00:09:00,2018-235541,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE


In [160]:
for x in cincinnati.columns:
    print(cincinnati[x].value_counts())

Cincinnati    237
Name: city, dtype: int64
Ohio    237
Name: state, dtype: int64
DISTRICT 3                   71
DISTRICT 4                   60
DISTRICT 5                   43
DISTRICT 2                   26
DISTRICT 1                   24
CIS                           4
CENTRAL BUSINESS DISTRICT     2
TRAINING SECTION              1
OUTSIDE CITY                  1
SSS                           1
Name: district, dtype: int64
39.175490    1
39.203293    1
39.108806    1
39.101149    1
39.121180    1
            ..
39.108296    1
39.128227    1
39.128686    1
39.145237    1
39.163781    1
Name: x_coord, Length: 139, dtype: int64
-84.455997    2
-84.430147    1
-84.528735    1
-84.488670    1
-84.413466    1
             ..
-84.523426    1
-84.645105    1
-84.500494    1
-84.515738    1
-84.581834    1
Name: y_coord, Length: 138, dtype: int64
2010-09-18 19:02:00    6
2014-06-04 18:20:00    4
2007-05-27 16:13:00    4
2009-11-17 11:15:00    3
2016-02-17 17:05:00    3
                      

In [161]:
cincinnatiShooting = cincinnati.copy().loc[cincinnati['incident_description'] == 'USE OF FORCE INVESTIGATION'].groupby(['id']).first()
cincinnatiShooting

Unnamed: 0_level_0,city,state,district,x_coord,y_coord,date,incident_description,citizen_sex,citizen_race,officer_sex,officer_race
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
9600333,Cincinnati,Ohio,DISTRICT 3,,,1996-09-30 00:30:00,USE OF FORCE INVESTIGATION,MALE,WHITE,MALE,WHITE
9600518,Cincinnati,Ohio,DISTRICT 4,,,1996-10-26 02:40:00,USE OF FORCE INVESTIGATION,MALE,ASIAN,MALE,WHITE
9800189,Cincinnati,Ohio,DISTRICT 4,,,1998-03-04 23:32:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE
9800300,Cincinnati,Ohio,DISTRICT 1,,,1998-02-02 19:43:00,USE OF FORCE INVESTIGATION,MALE,BLACK,FEMALE,WHITE
9800443,Cincinnati,Ohio,,,,1998-05-05 04:45:00,USE OF FORCE INVESTIGATION,MALE,WHITE,MALE,WHITE
...,...,...,...,...,...,...,...,...,...,...,...
2019-246738,Cincinnati,Ohio,DISTRICT 4,39.128686,-84.484054,2018-08-24 14:43:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE
2019-248398,Cincinnati,Ohio,DISTRICT 2,39.150831,-84.388074,2019-07-04 10:21:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,BLACK
2019-248765,Cincinnati,Ohio,DISTRICT 3,39.096373,-84.568387,2019-07-16 15:47:00,USE OF FORCE INVESTIGATION,FEMALE,BLACK,MALE,WHITE
2019-249505,Cincinnati,Ohio,DISTRICT 3,39.119091,-84.581834,2018-08-22 12:59:00,USE OF FORCE INVESTIGATION,FEMALE,BLACK,FEMALE,WHITE


In [162]:
officersSubjects = pd.get_dummies(cincinnatiShooting[['officer_race', 'officer_sex', 'citizen_race', 'citizen_sex']])

In [163]:
officersSubjectsCat = officersSubjects.groupby(['id']).sum()
cincinnatiShootingFirst = cincinnatiShooting.groupby(['id']).first()
cincinnatiEncoded = pd.concat([cincinnatiShootingFirst, officersSubjectsCat], axis = 1)
cincinnatiEncoded

Unnamed: 0_level_0,city,state,district,x_coord,y_coord,date,incident_description,citizen_sex,citizen_race,officer_sex,officer_race,officer_race_BLACK,officer_race_HISPANIC/LATINO,officer_race_WHITE,officer_sex_FEMALE,officer_sex_MALE,citizen_race_ASIAN,citizen_race_BLACK,citizen_race_HISPANIC/LATINO,citizen_race_UNKNOWN,citizen_race_WHITE,citizen_sex_FEMALE,citizen_sex_MALE,citizen_sex_OTHER
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1
9600333,Cincinnati,Ohio,DISTRICT 3,,,1996-09-30 00:30:00,USE OF FORCE INVESTIGATION,MALE,WHITE,MALE,WHITE,0,0,1,0,1,0,0,0,0,1,0,1,0
9600518,Cincinnati,Ohio,DISTRICT 4,,,1996-10-26 02:40:00,USE OF FORCE INVESTIGATION,MALE,ASIAN,MALE,WHITE,0,0,1,0,1,1,0,0,0,0,0,1,0
9800189,Cincinnati,Ohio,DISTRICT 4,,,1998-03-04 23:32:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE,0,0,1,0,1,0,1,0,0,0,0,1,0
9800300,Cincinnati,Ohio,DISTRICT 1,,,1998-02-02 19:43:00,USE OF FORCE INVESTIGATION,MALE,BLACK,FEMALE,WHITE,0,0,1,1,0,0,1,0,0,0,0,1,0
9800443,Cincinnati,Ohio,,,,1998-05-05 04:45:00,USE OF FORCE INVESTIGATION,MALE,WHITE,MALE,WHITE,0,0,1,0,1,0,0,0,0,1,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2019-246738,Cincinnati,Ohio,DISTRICT 4,39.128686,-84.484054,2018-08-24 14:43:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,WHITE,0,0,1,0,1,0,1,0,0,0,0,1,0
2019-248398,Cincinnati,Ohio,DISTRICT 2,39.150831,-84.388074,2019-07-04 10:21:00,USE OF FORCE INVESTIGATION,MALE,BLACK,MALE,BLACK,1,0,0,0,1,0,1,0,0,0,0,1,0
2019-248765,Cincinnati,Ohio,DISTRICT 3,39.096373,-84.568387,2019-07-16 15:47:00,USE OF FORCE INVESTIGATION,FEMALE,BLACK,MALE,WHITE,0,0,1,0,1,0,1,0,0,0,1,0,0
2019-249505,Cincinnati,Ohio,DISTRICT 3,39.119091,-84.581834,2018-08-22 12:59:00,USE OF FORCE INVESTIGATION,FEMALE,BLACK,FEMALE,WHITE,0,0,1,1,0,0,1,0,0,0,1,0,0


## Dallas Texas

In [151]:
dallas = dictOfDfs['Dallas Texas'].copy()
dallas.info()
dallas.head(10)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 251 entries, 0 to 250
Data columns (total 13 columns):
 #   Column                                        Non-Null Count  Dtype         
---  ------                                        --------------  -----         
 0   City                                          251 non-null    object        
 1   State                                         251 non-null    object        
 2   Case #                                        251 non-null    object        
 3   Date                                          251 non-null    datetime64[ns]
 4   Location                                      251 non-null    object        
 5   Subject Deceased, Injured, or Shoot and Miss  251 non-null    object        
 6   Subject Weapon                                251 non-null    object        
 7   Subject(s)                                    251 non-null    object        
 8   Officer(s)                                    251 non-null    object  

Unnamed: 0,City,State,Case #,Date,Location,"Subject Deceased, Injured, or Shoot and Miss",Subject Weapon,Subject(s),Officer(s),Grand Jury Disposition,Attorney General Forms URL,Summary URL,GeoLocation
0,Dallas,Texas,60045A,2013-03-14,2208 W. Northwest Highway,Deceased,Handgun,"Johnson, Tyrique B/M","Timms, Christopher B/M",No Bill,,https://www.dallaspolice.net/reports/OIS/narra...,"2208 W Northwest Highway\nDallas, Texas\n(32.8..."
1,Dallas,Texas,165193-2016,2016-07-07,801 Main Street,Deceased,Assault Rifle,"Johnson, Micah B/M","Edwards, Henry W/M; Wells, Giovanni B/M; Junge...",No Bill,https://www.dallaspolice.net/reports/OIS/ag_fo...,https://www.dallaspolice.net/reports/OIS/narra...,"801 Main Street\nDallas, Texas\n(32.779453, -9..."
2,Dallas,Texas,254101-2014,2014-10-22,9770 Forest Lane,Shoot and Miss,Unarmed,"McGee, Gregory B/M","Johnson, Michael B/M",,,https://www.dallaspolice.net/reports/OIS/narra...,"9770 Forest Lane\nDallas, Texas\n(32.909415, -..."
3,Dallas,Texas,263673A,2013-10-14,9452 Crimnson Court,Injured,Knife,"Bennett, Bobby W/M","Spencer, Cardan B/M",True Bill,,https://www.dallaspolice.net/reports/OIS/narra...,"9452 Crimnson Court\nDallas, Texas\n(32.669013..."
4,Dallas,Texas,203639-2016,2016-08-25,414 N Kramer,Deceased,Handgun,"Portillo, Elias L/M","Wagner, Christopher W/M",No Bill,https://www.dallaspolice.net/reports/OIS/ag_fo...,https://www.dallaspolice.net/reports/OIS/narra...,"414 N Kramer\nDallas, Texas\n(32.748632, -96.8..."
5,Dallas,Texas,251177-2016,2016-10-19,3500 Virginia Boulevard,Deceased,BB Gun,"Garcia, Anthony L/M","Guzman, Jose L/M",No Bill,https://www.dallaspolice.net/reports/OIS/ag_fo...,https://www.dallaspolice.net/reports/OIS/narra...,"3500 Virginia Boulevard\nDallas, Texas\n(32.74..."
6,Dallas,Texas,316668X,2010-11-16,8059 L.B.J. Freeway,Injured,Pellet Gun,"Williams, Joseph B/M","Felini, Michael W/M; Curtis, Guy W/M",No Bill,,https://www.dallaspolice.net/reports/OIS/narra...,"8059 L B J Freeway\nDallas, Texas\n(32.924881,..."
7,Dallas,Texas,1004453N,2004-12-29,2400 Walnut Hill Lane,Shoot and Miss,Vehicle,"Evans, Jerry W/M","Nguyen, Buu A/M",,,https://www.dallaspolice.net/reports/OIS/narra...,"2400 Walnut Hill Lane\nDallas, Texas\n(32.8810..."
8,Dallas,Texas,884709M,2003-11-20,6300 C.F. Hawn Freeway,Shoot and Miss,Handgun,Unknown L/M,"Martin, Roger W/M",,,https://www.dallaspolice.net/reports/OIS/narra...,"6300 C F Hawn Freeway\nDallas, Texas\n(32.7336..."
9,Dallas,Texas,56628A,2013-03-10,3303 Southern Oaks Boulevard,Deceased,Hands,"Clinton, Allen B/M","Staller, Clark W/M",No Bill,,https://www.dallaspolice.net/reports/OIS/narra...,"3303 Southern Oaks Boulevard\nDallas, Texas\n(..."


In [143]:
dallas['Case #'].value_counts()

236584A        1
161616-2016    1
042227-2018    1
302639Y        1
8025N          1
              ..
986476P        1
203505V        1
456826T        1
100044-2020    1
60944A         1
Name: Case #, Length: 251, dtype: int64

In [155]:
def get_awful_string(df, column):
    breakout = []
    for y in df[column].map(lambda x: x.split(' ')):
        tempList = []
        for k in range(len(y)):
            if ((k % 3)+1)% 3 == 0:
                tempList.append(y[k])
            else:
                continue
        breakout.append(tempList)
    return breakout
for x in get_awful_string(dallas,'Officer(s)'):
    print(''.join(x).split('/'))

['B', 'M']
['W', 'M;B', 'M;W', 'M;L', 'M;W', 'M;A', 'M;W', 'M;W', 'M;W', 'M;W', 'M;W', 'M;B', 'M']
['B', 'M']
['B', 'M']
['W', 'M']
['L', 'M']
['W', 'M;W', 'M']
['A', 'M']
['W', 'M']
['W', 'M']
['L', 'M']
['A', 'M']
['L', 'M;W', 'M;W', 'M;B', 'M']
['W', 'M;W', 'M;W', 'M;W', 'M;W', 'M']
['L', 'M']
['W', 'M']
['W', 'M']
['W', 'M']
['L', 'M']
['W', 'M']
['W', 'M;W', 'F']
['L', 'M']
['B', 'M']
['L', 'M']
['L', 'M']
['W', 'M;B', 'M;W', 'M;L', 'M']
['L', 'M']
['L', 'M']
['W', 'M']
['L', 'M;L', 'M']
['W', 'M']
['W', 'M']
['W', 'M']
['W', 'M']
['L', 'M']
['L', 'M']
['W', 'M;L', 'M']
['W', 'M']
['NA', 'M']
['W', 'M']
['B', 'M']
['B', 'M;W', 'M']
['W', 'M']
['W', 'M;W', 'M;W', 'M;W', 'M']
['W', 'M']
['W', 'M']
['W', 'M']
['W', 'M;W', 'M']
['L', 'M']
['W', 'M;L', 'M']
[',']
['B', 'M;B', 'M']
['L', 'M']
['W', 'M;L', 'M;W', 'M;B', 'M;W', 'M;W', 'M;W', 'M;W', 'M;B', 'M;W', 'M;W', 'M;W', 'M;W', 'M']
['W', 'M']
['L', 'M']
['B', 'F']
['W', 'M']
['B', 'M']
['B', 'M']
['W', 'M;W', 'M']
['L', 'M']
['W', '

<!-- for x in get_awful_string(dallas,'Subject(s)'): -->

In [145]:
dallas['x_y_coord'] = dallas.GeoLocation.map(lambda x: x.replace('(', '').replace(',','').replace(')','').strip().split()[-2:])

In [146]:
dallas.columns

Index(['City', 'State', 'Case #', 'Date', 'Location',
       'Subject Deceased, Injured, or Shoot and Miss', 'Subject Weapon',
       'Subject(s)', 'Officer(s)', 'Grand Jury Disposition',
       'Attorney General Forms URL', 'Summary URL', 'GeoLocation',
       'x_y_coord'],
      dtype='object')

In [147]:
dallas.drop(columns = ['Case #', 'Grand Jury Disposition',
       'Attorney General Forms URL', 'Summary URL', 'GeoLocation'])

Unnamed: 0,City,State,Date,Location,"Subject Deceased, Injured, or Shoot and Miss",Subject Weapon,Subject(s),Officer(s),x_y_coord
0,Dallas,Texas,2013-03-14,2208 W. Northwest Highway,Deceased,Handgun,"Johnson, Tyrique B/M","Timms, Christopher B/M","[32.8641, -96.898998]"
1,Dallas,Texas,2016-07-07,801 Main Street,Deceased,Assault Rifle,"Johnson, Micah B/M","Edwards, Henry W/M; Wells, Giovanni B/M; Junge...","[32.779453, -96.804915]"
2,Dallas,Texas,2014-10-22,9770 Forest Lane,Shoot and Miss,Unarmed,"McGee, Gregory B/M","Johnson, Michael B/M","[32.909415, -96.718739]"
3,Dallas,Texas,2013-10-14,9452 Crimnson Court,Injured,Knife,"Bennett, Bobby W/M","Spencer, Cardan B/M","[32.669013, -96.662659]"
4,Dallas,Texas,2016-08-25,414 N Kramer,Deceased,Handgun,"Portillo, Elias L/M","Wagner, Christopher W/M","[32.748632, -96.878279]"
...,...,...,...,...,...,...,...,...,...
246,Dallas,Texas,2019-04-06,2500 S. Vernon Ave.,Shoot and Miss,Vehicle,"Garcia, Abraham L/M","Rojo, Aldo L/M","[32.721806, -96.836167]"
247,Dallas,Texas,2003-03-18,14655 Preston Road,Deceased,Shotgun,"Bannister, James W/M","Guynn, Byron W/M","[32.947548, -96.803614]"
248,Dallas,Texas,2013-03-15,9147 Skillman Street,Shoot and Miss,BB Gun,"Jones, Christopher B/M","Tholl, Kyle W/M","[32.902101, -96.717073]"
249,Dallas,Texas,2019-03-22,10666 E. Northwest Hwy.,Injured,Handgun,"Jasso, Santiago L/M; Portillo, Oscar L/M","Amaya, Juan L/M","[32.864037, -96.701106]"


## Hartford Conneticut

In [74]:
hartford = dictOfDfs['Hartford Conneticut'].copy()
hartford.info()
hartford.drop(columns = [
    'Address',
    'UCR_1_Category',
    'UCR_2_Category',
    'UCR_1_Code',
    'UCR_2_Code'
])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 89 entries, 0 to 88
Data columns (total 14 columns):
 #   Column             Non-Null Count  Dtype         
---  ------             --------------  -----         
 0   City               89 non-null     object        
 1   State              89 non-null     object        
 2   Case_Number        89 non-null     int64         
 3   Date               89 non-null     datetime64[ns]
 4   Time_24HR          89 non-null     int64         
 5   Address            89 non-null     object        
 6   UCR_1_Category     89 non-null     object        
 7   UCR_1_Description  89 non-null     object        
 8   UCR_1_Code         89 non-null     int64         
 9   UCR_2_Category     76 non-null     object        
 10  UCR_2_Description  76 non-null     object        
 11  UCR_2_Code         89 non-null     int64         
 12  Neighborhood       89 non-null     object        
 13  geom               89 non-null     object        
dtypes: datetime6

Unnamed: 0,City,State,Case_Number,Date,Time_24HR,UCR_1_Description,UCR_2_Description,Neighborhood,geom
0,Hartford,Connecticut,19037774,2019-11-21,1440,SHOOTING-INJ,RECKLESS END 1,FROG HOLLOW,"(41.76221240514027, -72.69518931015217)"
1,Hartford,Connecticut,19038163,2019-11-25,1504,PD-SHOOTING,ANIMAL COMPLNT,SOUTHWEST,"(41.72656736192673, -72.69877001050818)"
2,Hartford,Connecticut,19039337,2019-12-06,1121,PD-SHOOTING,ANIMAL COMPLNT,SOUTH GREEN,"(41.757536822683626, -72.6775877244074)"
3,Hartford,Connecticut,19040389,2019-12-13,2326,PD-SHOOTING,ANIMAL BITE,CLAY-ARSENAL,"(41.777946086198725, -72.68211553566037)"
4,Hartford,Connecticut,20000239,2020-01-02,2036,PD-SHOOTING,ANIMAL COMPLNT,BEHIND THE ROCKS,"(41.739999911143904, -72.69672515790272)"
...,...,...,...,...,...,...,...,...,...
84,Hartford,Connecticut,18034803,2018-11-16,1055,PD-SHOOTING,,CLAY-ARSENAL,"(41.78077611289366, -72.68439426599812)"
85,Hartford,Connecticut,5019664,2005-05-07,1924,PD-SHOOTING,FATAL SHOOTING,NORTHEAST,"(41.786822387292226, -72.67355559756052)"
86,Hartford,Connecticut,7006629,2007-02-16,2210,RECKLESS END 1,SHOOTING-INJ,NORTHEAST,"(41.78568000046961, -72.68122999999389)"
87,Hartford,Connecticut,11002573,2011-01-23,151,PD-SHOOTING,OCC-INJ-POLICE,WESTEND,"(41.76680072076744, -72.70830864001476)"


In [75]:
hartford['UCR_2_Description'].value_counts()

ANIMAL COMPLNT    27
SHOOTING-INJ      21
SHOOTING-MISS      7
PD-SHOOTING        7
FATAL SHOOTING     5
RECKLESS END 1     3
ANIMAL BITE        3
BREACH-PEACE       1
RISK OF INJURY     1
OCC-INJ-POLICE     1
Name: UCR_2_Description, dtype: int64

### Getting rid of animal shootings

In [76]:
hartfordShooting = hartford.loc[(hartford['UCR_2_Description'] != 'ANIMAL COMPLNT') & 
                                (hartford['UCR_2_Description'] != 'ANIMAL BITE') & 
                                (hartford['UCR_1_Description'] != 'ANIMAL COMPLNT')]
hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'SHOOTING-INJ') | 
                     (hartfordShooting['UCR_2_Description'] == 'SHOOTING-INJ')]

hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'SHOOTING-MISS') | 
                     (hartfordShooting['UCR_2_Description'] == 'SHOOTING-MISS')]

hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'FATAL SHOOTING') | 
                     (hartfordShooting['UCR_2_Description'] == 'FATAL SHOOTING')]

Unnamed: 0,City,State,Case_Number,Date,Time_24HR,Address,UCR_1_Category,UCR_1_Description,UCR_1_Code,UCR_2_Category,UCR_2_Description,UCR_2_Code,Neighborhood,geom
13,Hartford,Connecticut,15017598,2015-06-09,1915,144 WESTMINSTER ST,04* - AGGRAVATED ASSAULT,ASSLT-PO-KNIFE,422,49* - SHOOTING,FATAL SHOOTING,4901,BLUE HILLS,"(41.79747742497788, -72.70259793482641)"
26,Hartford,Connecticut,13015945,2013-05-15,2201,24 HEATH ST,49* - SHOOTING,PD-SHOOTING,4900,49* - SHOOTING,FATAL SHOOTING,4901,PARKVILLE,"(41.75712035776425, -72.71072519222501)"
28,Hartford,Connecticut,12023809,2012-07-11,105,60 WADSWORTH ST,49* - SHOOTING,PD-SHOOTING,4900,49* - SHOOTING,FATAL SHOOTING,4901,SOUTH GREEN,"(41.75904174484983, -72.67915721701848)"
83,Hartford,Connecticut,19024187,2019-07-26,2154,I 84W-EXIT 48 ON RAMP,49* - SHOOTING,PD-SHOOTING,4900,49* - SHOOTING,FATAL SHOOTING,4901,ASYLUM HILL,"(41.7664585150635, -72.6871952543044)"
85,Hartford,Connecticut,5019664,2005-05-07,1924,2374 MAIN ST,49* - SHOOTING,PD-SHOOTING,4900,49* - SHOOTING,FATAL SHOOTING,4901,NORTHEAST,"(41.786822387292226, -72.67355559756052)"


### Creating injury column from ucr description

In [77]:
hartfordShooting['UCR_2_Description'].isin(['SHOOTING-INJ',])

0     False
6      True
7      True
9      True
10    False
      ...  
83    False
84    False
85    False
86     True
87    False
Name: UCR_2_Description, Length: 54, dtype: bool

### Reducing features

In [78]:
ucr1_description_map = {
    ''
}

In [79]:
hartfordShooting['UCR_1_Description'].value_counts()

RECKLESS END 1              18
PD-SHOOTING                 12
SHOOTING-INJ                 9
ASSLT-PO-WEAPON              5
SHOOTING-MISS                2
RISK OF INJURY               1
CRUELTY/ANIMAL               1
SUDDEN DEATH                 1
PD/VEH/ACC/MI*MULT-INJ*      1
RECOVERED STOLEN FIREARM     1
ROBBERY-GUN *STREET*         1
ASSLT-PO-GUN                 1
ASSLT-PO-KNIFE               1
Name: UCR_1_Description, dtype: int64

In [80]:
hartfordShooting['UCR_2_Description'].value_counts()

SHOOTING-INJ      21
PD-SHOOTING        6
FATAL SHOOTING     5
RECKLESS END 1     3
SHOOTING-MISS      3
BREACH-PEACE       1
OCC-INJ-POLICE     1
RISK OF INJURY     1
Name: UCR_2_Description, dtype: int64

In [81]:
hartford['UCR_1_Category'].map(lambda x: x.replace('-','').replace('*','').split()[-1])

0     SHOOTING
1     SHOOTING
2     SHOOTING
3     SHOOTING
4     SHOOTING
        ...   
84    SHOOTING
85    SHOOTING
86     ASSAULT
87    SHOOTING
88    SHOOTING
Name: UCR_1_Category, Length: 89, dtype: object

## Indianapolis Indiana

### Intro data exploration

In [169]:
indianapolis = dictOfDfs['Indianapolis Indiana']
indianapolis.info()
indianapolis.columns = [
    'city',
    'state',
    'id',
    'date',
    'division',
    'district',
    'shift',
    'beat',
    'disposition',
    'citizen_armed',
    'officer_armed',
    'incident_initiation',
    'citizen_injury',
    'officer_injury',
    'citizen_race',
    'citizen_sex',
    'citizent_age',
    'officer_race',
    'officer_sex',
    'officer_age',
    'officer_years_of_service',
    'officer_identifier'
]
# indianapolis = indianapolis.drop(columns = [
#     'officerWeaponUsed',
#     'beat', 
#     'disposition', 
#     'officerIdentifier'
# ])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 54 entries, 0 to 53
Data columns (total 22 columns):
 #   Column                 Non-Null Count  Dtype         
---  ------                 --------------  -----         
 0   City                   54 non-null     object        
 1   State                  54 non-null     object        
 2   id                     54 non-null     object        
 3   occurredDate           53 non-null     datetime64[ns]
 4   division               53 non-null     object        
 5   district               53 non-null     object        
 6   shift                  53 non-null     object        
 7   beat                   6 non-null      object        
 8   disposition            1 non-null      object        
 9   residentWeaponUsed     41 non-null     object        
 10  officerWeaponUsed      51 non-null     object        
 11  serviceType            33 non-null     object        
 12  residentCondition      49 non-null     object        
 13  officer

In [170]:
for x in indianapolis.columns:
    print(indianapolis[x].value_counts());

Indianapolis    54
Name: city, dtype: int64
Indiana    54
Name: state, dtype: int64
1b3c48c6f44d0af6944812bf740a9cce    6
ca175186383abf3ed448cb22df346753    3
5b724ceb1dfbb2bbcfa25102b30ba55a    3
bb5c59502f5d1b3834cc8e97638146e4    2
a1bf412e46bc7a9377813d6859b04979    2
dc503d13c8ff990a1edc0d1e69f0bf1a    2
03221d6cbd5478dc62ea6a0603d9d90d    2
658455897d58d7dff125447e51d83846    2
402e46b147ecbc9c6fa8d9b11e08714f    2
55ac7711fc0a8e42707070d3526d00bf    2
cd314cea50e8fa57d5d49eb1af53dbfd    2
da659547952913021f57cea0af21d540    1
52a0eb13a62ef588e55976df0537a01d    1
a5cd96363355d77224357c89a716483c    1
b055ef4be4958bea407cc0bbd2cf0c53    1
cfa745b4166916266a70fb1f3a95510d    1
2e6d0b4e9b9340cec135bec91da82b22    1
580df496c6c5bfde500e0b21ef2ce95b    1
41622a20fedd9cba24e690f246c687f4    1
a4aee5eed92ac486c08ecbf99511f1e7    1
66745f1f48d425a018634f5f38851add    1
c3d025a3d32bdc49ecc0efa0e0fb1fa3    1
f3923c0b16fbbcaef2231c95236ae35f    1
893f13656351c59dd0e2f9d5140754cc    1
3f1e

In [174]:
race_map = {
    'White' : 'white',
    'WHITE' : 'white',
    'W' : 'white',
    'w' : 'white',
    'Black': 'black',
    'BLACK': 'black',
    'B' : 'black',
    'b' : 'black',
    'Hispanic': 'hispanic',
    'HISPANIC' : 'hispanic',
    'H' : 'hispanic',
    'h' : 'hispanic',
    'Unknown' : 'unknown',
    'UNK' : 'unknown',
    'UNKNOWN' : 'unknown'
}

sex_map = {
    'MALE': 'male',
    'Male' : 'male',
    'M' : 'male',
    'm' : 'male',
    'FEMALE' : 'female',
    'Female' : 'female',
    'F' : 'female',
    'f' : 'female'
}

indianapolis['officer_race'] = indianapolis['officer_race'].map(race_map)
indianapolis['citizen_race'] = indianapolis['citizen_race'].map(race_map)
indianapolis['citizen_sex'] = indianapolis['citizen_sex'].map(sex_map)
indianapolis['officer_sex'] = indianapolis['officer_sex'].map(sex_map)

In [175]:

officers = pd.concat([indianapolis['id'],pd.get_dummies(indianapolis[['officer_race','officer_sex']])], axis = 1)
officersGrouped = officers.groupby(['id']).sum()
indianapolisThing = indianapolis.groupby(['id']).first()
indianapolisEncoded = pd.concat([indianapolisThing, officersGrouped], axis = 1).reset_index()
indianapolisEncoded.head()

Unnamed: 0,id,city,state,date,division,district,shift,beat,disposition,citizen_armed,officer_armed,incident_initiation,citizen_injury,officer_injury,citizen_race,citizen_sex,citizent_age,officer_race,officer_sex,officer_age,officer_years_of_service,officer_identifier,officer_sex_female,officer_sex_male
0,028a1f6a21dbb4856c41d1f0510db606,Indianapolis,Indiana,2015-10-05,Reserves,Platoon 4,Squad 2,,,Handgun,IMPD - Duty Handgun,Investigation,,No injuries noted or visible,,male,47.0,,male,34.0,11.0,7efe6cc87736398ca74644d46db534d2,0,1
1,03221d6cbd5478dc62ea6a0603d9d90d,Indianapolis,Indiana,2015-08-17,Operations Division,North District,ND Late Shift,,,Handgun,IMPD - Duty Handgun,Investigation,Gunshot Wound,No injuries noted or visible,,male,21.0,,male,51.0,13.0,c971e97de8fe33ecb13576dbd2f1a4c4,0,2
2,122aabf48ef2899f6ba743e252e2c2a9,Indianapolis,Indiana,2014-10-29,Operations Division,North District,ND Day Shift,ND Day Shift,,Handgun,IMPD - Duty Handgun,Call For Service,No injuries noted or visible,No injuries noted or visible,,male,29.0,,male,52.0,29.0,a4bff69d0bc8af0da01e62cdcc2f0d65,0,1
3,1b3c48c6f44d0af6944812bf740a9cce,Indianapolis,Indiana,2015-04-06,Operations Division,Southwest District,SW Late Shift,,,Handgun,IMPD - Duty Handgun,Call For Service,Gunshot Wound,,,male,23.0,,male,48.0,14.0,7d743deffb658ef13a3973bf14cb4638,2,4
4,25fbcadc17452a0562780752f151ac6a,Indianapolis,Indiana,2014-12-13,Operations Division,North District,ND Middle Shift,,,Unarmed,IMPD - Duty Handgun,Call For Service,Gunshot Wound,No injuries noted or visible,,male,42.0,,male,29.0,4.0,07c216294c07116ab8800137fb2f2cf8,0,1


## Orlando Florida

In [86]:
orlando = dictOfDfs['Orlando Florida'].copy()
orlando.info()
orlando.drop(columns = [
    'Review Letter',
    'Georeferenced Column'
])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 65 entries, 0 to 64
Data columns (total 25 columns):
 #   Column                       Non-Null Count  Dtype         
---  ------                       --------------  -----         
 0   City                         65 non-null     object        
 1   State                        65 non-null     object        
 2   Case #                       65 non-null     object        
 3   Date                         65 non-null     datetime64[ns]
 4   Address                      65 non-null     object        
 5   Incident Type                65 non-null     object        
 6   Number of Officers Involved  65 non-null     int64         
 7   Officer Name                 65 non-null     object        
 8   Officer Race                 65 non-null     object        
 9   Ethnicity                    61 non-null     object        
 10  Officer Gender               65 non-null     object        
 11  Officer Age                  65 non-null     ob

Unnamed: 0,City,State,Case #,Date,Address,Incident Type,Number of Officers Involved,Officer Name,Officer Race,Ethnicity,Officer Gender,Officer Age,Officer Tenure,Number of Suspects Involved,Suspect Name,Suspect Race,Suspect Gender,Suspect Age,Suspect's Weapon,Suspect Hit,Fatal,Officer Hit,Fatal (Officer)
0,Orlando,Florida,11-257484,2011-06-10,400 S. Parramore,Drug Violation,1,"Ochiuzzo, Matt",W,N,M,28,5,1,"Fell,Ronnery",Black,M,27,Hands/reaching,Yes,No,No,No
1,Orlando,Florida,18-228375,2018-06-10,4832 Eaglesmere Drive,Battery,1,"Gonzalez, Emanuel",W,H,M,30,1,1,"Lindsey, Gary",White,M,35,Handgun,Undetermined,Self-Inflicted,No,No
2,Orlando,Florida,20-60687,2020-02-18,"4567 S. Kirkman Rd. Orlando, FL 32811",Occupied Stolen Vehicle,2,"Chisari, Ben; Madison, Jeffery","W, B",,"M, M","38, 39","9, 1",1,"Aldophe, Kevin",Black,M,22,Handgun,Yes,Yes,No,No
3,Orlando,Florida,13-237300,2013-06-10,I4/Livingston,Armed Subject,4,"D'Amato, Greg; Broadhurst, Blake; Edwards, Mic...","W, B, B, W","N, N, N, N","M, M, F, M","41, 30, 29, 33","11, 5, 0.25, 4",1,"Paige, Joseph",Black,M,24,Handgun,Yes,Yes,"No, No, No, No","No, No, No, No"
4,Orlando,Florida,18-373194,2018-09-25,5055 City Street,Unknown Trouble,4,"Goncalves, Gustavo; Barber, Alex; Gonzalez, Em...","W,W,W,W","H,W,H,W","M,M,M,M",36283028,1111,1,"Burch, Dravious Lavon Sr.",Black,M,34,Handgun (2),Yes,Yes,No,No
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
60,Orlando,Florida,18-414226,2018-10-26,3936 W D Judge Drive,Violent Mentally Ill Person,1,"Wright, Norman; Costa, Robert","B,W","N,H","M,M",4857,4651,1,"Counts,Tymeke",Black,M,18,Handgun,No,No,No,No
61,Orlando,Florida,13-495877,2013-11-25,Thorpe/Wentwood,Traffic Stop,1,"Briscoe, Derek",W,N,M,30,4,1,"Leggitt, Brian",White,M,31,Motor Vehicle,Yes,Yes,No,No
62,Orlando,Florida,09-212879,2009-05-06,25 W Church Street,Traffic Crash,2,"Alban, David; Brady, Daniel","W, W","H, N","M, M","34, 35","4, 12",1,"Delices, Vales",Black,M,23,Motor Vehicle,Yes,Yes,"No, No","No, No"
63,Orlando,Florida,14-211161,2014-05-21,4449 Malibu Dr,Armed Robbery,2,"Mongeluzzo, Anthony; Goss, Aaron","W, W","N, N","M, M","35, 34","7, 5",1,"Rodgers, Jermassioun",Black,M,20,Handgun,Yes,Yes,"No, No","No, No"


In [87]:
for x in orlando.columns:
    print(orlando[x].value_counts())

Orlando    65
Name: City, dtype: int64
Florida    65
Name: State, dtype: int64
11-306389    1
17-075014    1
14-443123    1
13-301648    1
11-201994    1
            ..
14-345320    1
18-379906    1
12-073698    1
11-521034    1
13-022948    1
Name: Case #, Length: 65, dtype: int64
2009-05-06    2
2012-02-19    2
2018-10-01    1
2012-07-03    1
2014-10-24    1
             ..
2016-05-01    1
2014-05-22    1
2015-07-13    1
2018-09-25    1
2014-02-24    1
Name: Date, Length: 63, dtype: int64
390 N Orange                                       1
4400 Block Martins Way                             1
2124 W. Colonial Dr. Orlando, FL32804              1
118 S. Orange Ave                                  1
Hiawassee & SR 50                                  1
                                                  ..
2 S. Orange Ave                                    1
1657 Watauga Way                                   1
Jamil/Central                                      1
Bowen DR at Folkstone      

In [88]:
orlando['officers_white'] = get_specific_type(orlando, 'Officer Race', 'W', seperating_value=', ')
orlando['officers_black'] = get_specific_type(orlando, 'Officer Race', 'B', seperating_value=', ')
orlando['officers_other'] = get_specific_type(orlando, 'Officer Race', 'O', seperating_value=', ')
orlando['officers_race_exempt'] = get_specific_type(orlando, 'Officer Race', 'Exempt', seperating_value=', ')
orlando['officers_male'] = get_specific_type(orlando, 'Officer Gender', 'M', seperating_value=', ')
orlando['officers_female'] = get_specific_type(orlando, 'Officer Gender', 'F', seperating_value=', ')
orlando['officers_sex_unknown'] = get_specific_type(orlando, 'Officer Gender', 'Unknown', seperating_value=', ')
orlando['officers_injured'] = get_specific_type(orlando, 'Officer Hit', 'Yes', seperating_value=', ')
orlando['officers_killed'] = get_specific_type(orlando, 'Fatal (Officer)', 'Yes', seperating_value=', ')

In [89]:
orlando['suspects_white'] = get_specific_type(orlando, 'Suspect Race', 'White', seperating_value=', ')
orlando['suspects_black'] = get_specific_type(orlando, 'Suspect Race', 'Black', seperating_value=', ')
orlando['suspects_other'] = get_specific_type(orlando, 'Suspect Race', 'Other', seperating_value=', ')
orlando['suspects_race_unknown'] = get_specific_type(orlando, 'Suspect Race', 'Unknown', seperating_value=', ')
orlando['suspects_male'] = get_specific_type(orlando, 'Suspect Gender', 'M', seperating_value=', ')
orlando['suspects_female'] = get_specific_type(orlando, 'Suspect Gender', 'F', seperating_value=', ')
orlando['suspects_sex_unknown'] = get_specific_type(orlando, 'Suspect Gender', 'Unknown', seperating_value=', ')
orlando['suspects_injured'] = get_specific_type(orlando, 'Suspect Hit', 'Yes', seperating_value=', ')
orlando['suspects_killed'] = get_specific_type(orlando, 'Fatal', 'Yes', seperating_value=', ')

In [90]:
orlandoEncoded = orlando.copy()

## Sparks Nevada

In [91]:
sparks = dictOfDfs['Sparks Nevada'].copy()
sparks.info()
sparks.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 19 entries, 0 to 18
Data columns (total 15 columns):
 #   Column                       Non-Null Count  Dtype 
---  ------                       --------------  ----- 
 0   City                         19 non-null     object
 1   State                        19 non-null     object
 2   YEAR                         19 non-null     int64 
 3   Type of Incident             19 non-null     object
 4   Investigating Agency Case #  19 non-null     object
 5   Action Taken Against Oficer  19 non-null     object
 6   Subject Age                  19 non-null     object
 7   Subject Gender               19 non-null     object
 8   Subject Race                 19 non-null     object
 9   Subject Armed                19 non-null     object
 10  Subject Weapon               19 non-null     object
 11  #  of SPD Officers           19 non-null     object
 12  Officer's Race               19 non-null     object
 13  Subject Struck               19 non-n

Unnamed: 0,City,State,YEAR,Type of Incident,Investigating Agency Case #,Action Taken Against Oficer,Subject Age,Subject Gender,Subject Race,Subject Armed,Subject Weapon,# of SPD Officers,Officer's Race,Subject Struck,Injuries
0,Sparks,Nevada,2000,Wanted Subject,RPD 00-236154,BDW,46,M,W,Yes,Handgun,1,White,Yes,Non Fatal
1,Sparks,Nevada,2002,Wanted Subject,RPD 02-2220,ADW,29,M,W,Unknown,Vehicle,1,White,Unknown,Non Fatal
2,Sparks,Nevada,2003,Traffic Stop,SPD 03-12116,BDW,16,M,H,No,Vehicle,1,White,No,
3,Sparks,Nevada,2003,Wanted Subject,RPD 03-47722,ADW,23,M,W,No,Vehicle,1,White,Yes,Fatal
4,Sparks,Nevada,2005,Eluding/Warrant,SPD 05-14577,ADW,36,M,W,No,Vehicle,1,White,No,


## Portland Oregon

In [92]:
portland = dictOfDfs['Portland Oregon'].copy()
portland.info()
portland.drop(columns = [ 
    'Internal Affairs Investigation Link',
    'Internal Affairs Investigation Link 2 (as needed)',
    'Online PDF Link',
    'Was Subject Percieved as Armed?',
    'Deadly Force Type',
    'Grand Jury Transcripts link'
])

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 55 entries, 0 to 54
Data columns (total 27 columns):
 #   Column                                             Non-Null Count  Dtype         
---  ------                                             --------------  -----         
 0   City                                               55 non-null     object        
 1   State                                              55 non-null     object        
 2   Any Officer Fatal Injuries?                        55 non-null     object        
 3   Any Officers Injured on Case?                      55 non-null     object        
 4   Case #                                             55 non-null     object        
 5   Day of Date Time                                   55 non-null     datetime64[ns]
 6   Deadly Force Type                                  55 non-null     object        
 7   Did Subject Fire Shots at Officers?                55 non-null     object        
 8   General Location      

Unnamed: 0,City,State,Any Officer Fatal Injuries?,Any Officers Injured on Case?,Case #,Day of Date Time,Did Subject Fire Shots at Officers?,General Location,Initial Call Type,Officer Race,Officer Sex,Subject Name,Subject Race,Subject Sex,Subject Weapon Type (NIBRS),Was Subject Actually Armed?,Was Subject Injured?,Was Subject Injury Fatal?,Number of Officers Injured on Case,Officer Tenure (yrs),Subject Age
0,Portland,Oregon,0,No,pending release (6),2020-06-28,pending release,7900 block of Southeast 6th Ave,Premise Check,Male,Male,"Stockton, Gray Tristan",White,Male,pending release,pending release,No,No,0,7,25
1,Portland,Oregon,No,No,8352-10-01 00:00:00,2010-01-29,No,Sandy Terrace Apartments,Disorder,White,Male,"Campbell, Aaron",Black,Male,,No,Yes,Yes,0,8,25
2,Portland,Oregon,No,No,10-23336,2010-03-22,No,Hoyt Arboretum,Disorder,White,Male,"Collins, Jack",White,Male,Knife /Cutting Instrument,Yes,Yes,Yes,0,13,58
3,Portland,Oregon,No,No,10-097006,2010-11-23,Yes,11000 block SE 106th & Cherry Blossom Ave,Shots Fired,White,Male,"Boehler, Craig",White,Male,Handgun,Yes,Yes,Yes,0,17,46
4,Portland,Oregon,No,No,10-103413,2010-12-17,No,12110 East Burnside St,Disorder,White,Male,"Ferguson, Darryel Dwayne",White,Male,Firearm-replica,Yes,Yes,Yes,0,8,90
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
50,Portland,Oregon,No,Yes,11-18048,2011-03-06,Yes,3300 block Southeast 10th Ave,Behaviorial Health,White,Male,"Turner, Ralph",White,Male,Rifle,Yes,No,No,3,9,61
51,Portland,Oregon,No,Yes,14-20245,2014-03-12,Yes,SW Cheltenham St,Traffic Stop,Hispanic,Male,"Swoboda, Kelly Vern",White,Male,Handgun,Yes,Yes,Yes,1,8,49
52,Portland,Oregon,No,Yes,14-30864,2014-04-16,Yes,SW Lobelia St and Capitol Hwy,Disorder,White,Male,"Ropp, Paul",White,Male,Rifle,Yes,Yes,No,2,25,40
53,Portland,Oregon,No,Yes,15-225588,2015-07-05,No,11036 NE Sandy Blvd,Person Contact,White,Male,"Ellis, David James",White,Male,Knife /Cutting Instrument,Yes,Yes,No,1,4,55


In [93]:
for x in portland.columns:
    print(portland[x].value_counts())

Portland    55
Name: City, dtype: int64
Oregon    55
Name: State, dtype: int64
No    54
0      1
Name: Any Officer Fatal Injuries?, dtype: int64
No     48
Yes     7
Name: Any Officers Injured on Case?, dtype: int64
11-18048                                   2
19-138195                                  2
15-216504                                  2
11-375                                     2
13-018128                                  2
17-165086                                  2
19-420530                                  1
Gresham PD case                            1
10-103413                                  1
12-85653                                   1
pending release (6)                        1
1968-11-01 00:00:00                        1
14-47574                                   1
18-114856                                  1
15-225588                                  1
17-143978                                  1
Washington County Sheriff's Office case    1
16-164536           

In [94]:
officersSubjectsCat = pd.get_dummies(portland[['Officer Race', 
                                               'Officer Sex', 
                                               'Subject Race', 
                                               'Subject Sex', 
                                               'Was Subject Injured?', 
                                               'Was Subject Injury Fatal?']])
officersSubjectsCat = pd.concat([portland['Case #'],officersSubjectsCat], axis = 1)
officersSubjectsCat = officersSubjectsCat.groupby(officersSubjectsCat['Case #']).sum().reset_index()
portlandEncoded = pd.merge(portland.groupby('Case #').first().reset_index(), officersSubjectsCat, how = 'left', on = 'Case #')

portlandEncoded.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 49 entries, 0 to 48
Data columns (total 44 columns):
 #   Column                                             Non-Null Count  Dtype         
---  ------                                             --------------  -----         
 0   Case #                                             49 non-null     object        
 1   City                                               49 non-null     object        
 2   State                                              49 non-null     object        
 3   Any Officer Fatal Injuries?                        49 non-null     object        
 4   Any Officers Injured on Case?                      49 non-null     object        
 5   Day of Date Time                                   49 non-null     datetime64[ns]
 6   Deadly Force Type                                  49 non-null     object        
 7   Did Subject Fire Shots at Officers?                49 non-null     object        
 8   General Location      

## Louisville Kentucky

In [95]:
louisville = dictOfDfs['Louisville Kentucky'].copy()
louisville.info()
louisville.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 95 entries, 0 to 94
Data columns (total 24 columns):
 #   Column                   Non-Null Count  Dtype  
---  ------                   --------------  -----  
 0   #                        82 non-null     object 
 1   Incident #               85 non-null     object 
 2   Month                    66 non-null     object 
 3   Day                      65 non-null     object 
 4   Time of Occurrence       65 non-null     object 
 5   Address of incident      65 non-null     object 
 6   Division                 65 non-null     object 
 7   Beat                     43 non-null     float64
 8   Investigation Type       66 non-null     object 
 9   Case Status              67 non-null     object 
 10  suspect_Name             69 non-null     object 
 11  suspect_race             95 non-null     object 
 12  suspect_sex              95 non-null     object 
 13  suspect_age              66 non-null     object 
 14  suspect_ethnicity        59 

Unnamed: 0,#,Incident #,Month,Day,Time of Occurrence,Address of incident,Division,Beat,Investigation Type,Case Status,suspect_Name,suspect_race,suspect_sex,suspect_age,suspect_ethnicity,suspect_weapon,officer_name,officer_race,officer_sex,officer_age,officer_ethnicity,officer_year_of_service,Lethal Y/N,Narrative
0,20-019,80-20-017048,March,13,43,3003 Springfield Dr.,3,,OIS,Open,Kenneth Walker III,B,M,27,U,Firearm,"Jon Mattingly,Brett Hankison,Myles Cosgrove",W,M,47,U,19,Y,Officers attempted to make entry into the resi...
1,20-020,80-20-017278,March,13,2117,8015 Blake Ln,3,,OIS,Open,Justin Riggs,W,M,30,U,U,"Nathaniel Lovewell,Joseph Borst,Rebecca Davis",W,M,31,U,6,Y,Officers responded to a reported domestic viol...
2,20-024,80-20-019370,March,25,1630,3030 Talisman Road,6,,OIS,Open,Byron Johnson,B,M,31,U,Firearm,"Brian Evanoff,Chad Stewart,Michael Woodard",W,M,40,U,9,N,Officers attempted to take wanted felony suspe...
3,20-026,80-20-020215,March,30,1206,200 Block E. Grey Street,1,,OIS,Open,Jessie Stringfield,W,M,44,U,Firearm,Michael Johnson,W,M,41,U,11,Y,Suspect was a known fugitve with outstanding b...
4,20-030,80-20-023475,April,17,1631,2700 Block S. 7th Street,4,,OIS,Open,Kendrick Holloway,B,M,42,U,Firearm,Joseph Tapp,W,M,48,U,12,N,As part of an on-going narcotics investigation...


In [96]:
for x in louisville.columns:
    print(louisville[x].value_counts())

14-090    4
11-080    4
14-043    3
17-007    2
13-051    2
         ..
19-025    1
18-066    1
12-033    1
19-012    1
19-021    1
Name: #, Length: 67, dtype: int64
Continued       20
80-20-023475     1
80-12-104103     1
8012104102       1
80-20-019370     1
                ..
80-19-028008     1
80-20-017278     1
80-14-101100     1
80-19-035697     1
80-19-059529     1
Name: Incident #, Length: 66, dtype: int64
April         12
August        10
March         10
October        7
May            5
July           5
February       4
June           3
December       3
January        2
November       2
September      2
September      1
Name: Month, dtype: int64
13       5
8        4
1        4
5        3
28       3
15       2
17       2
28th     2
23       2
25       2
20       2
21       2
31       2
13th     2
20th     2
26th     2
14       2
7        1
19th     1
8th      1
10       1
3        1
11       1
12       1
9th      1
16       1
18       1
19       1
6        1
24       1
29   

In [97]:
louisville['num_of_officers'] = [len(str(x).split(',')) for x in louisville.officer_name]
louisville.head()

Unnamed: 0,#,Incident #,Month,Day,Time of Occurrence,Address of incident,Division,Beat,Investigation Type,Case Status,suspect_Name,suspect_race,suspect_sex,suspect_age,suspect_ethnicity,suspect_weapon,officer_name,officer_race,officer_sex,officer_age,officer_ethnicity,officer_year_of_service,Lethal Y/N,Narrative,num_of_officers
0,20-019,80-20-017048,March,13,43,3003 Springfield Dr.,3,,OIS,Open,Kenneth Walker III,B,M,27,U,Firearm,"Jon Mattingly,Brett Hankison,Myles Cosgrove",W,M,47,U,19,Y,Officers attempted to make entry into the resi...,3
1,20-020,80-20-017278,March,13,2117,8015 Blake Ln,3,,OIS,Open,Justin Riggs,W,M,30,U,U,"Nathaniel Lovewell,Joseph Borst,Rebecca Davis",W,M,31,U,6,Y,Officers responded to a reported domestic viol...,3
2,20-024,80-20-019370,March,25,1630,3030 Talisman Road,6,,OIS,Open,Byron Johnson,B,M,31,U,Firearm,"Brian Evanoff,Chad Stewart,Michael Woodard",W,M,40,U,9,N,Officers attempted to take wanted felony suspe...,3
3,20-026,80-20-020215,March,30,1206,200 Block E. Grey Street,1,,OIS,Open,Jessie Stringfield,W,M,44,U,Firearm,Michael Johnson,W,M,41,U,11,Y,Suspect was a known fugitve with outstanding b...,1
4,20-030,80-20-023475,April,17,1631,2700 Block S. 7th Street,4,,OIS,Open,Kendrick Holloway,B,M,42,U,Firearm,Joseph Tapp,W,M,48,U,12,N,As part of an on-going narcotics investigation...,1


In [98]:
louisville['suspect_weapon'].value_counts()

Handgun          26
Firearm          15
Vehicle           7
None              4
Rifle             3
U                 2
Shotgun           2
Knife             2
Metal Rod         1
Knives            1
Knife/Saw         1
Handgun/Rifle     1
Metal Pole        1
Screwdriver       1
BB Gun            1
Name: suspect_weapon, dtype: int64

In [99]:
investigation_map = {
    'OIS'                                        :'Unknown',
    'SHOOTING INVESTIGATION - LMPD INVOLVED'     :'shooting',
    'DEATH INVESTIGATION-LMPD INVOLVED'          :'homicide',
    'SHOOTING INVESTIGATION-LMPD INVOLVED'       :'shooting',
    'SHOOTING INVESTIGATION- LMPD INVOLVED'      :'shooting',
    'SHOOTING INVESTIGATIONS - LMPD INVOLVED'    :'shooting',
    'DEATH INVESTIGATION - LMPD INVOLVED'        :'homicide',
    'Assault 1st-Police'                         :'assault',
    'Wanton Endangrerment 1st'                   :'reckless-endanderment',
    'Assault 1st'                                :'assault'
}

In [100]:
suspect_weapon_map = {
    'Handgun'          :'firearm',
    'Firearm'          :'firearm',
    'Vehicle'           :'other',
    'None'              :'none',
    'Rifle'             :'firearm',
    'Knife'             :'melee',
    'Shotgun'           :'firearm',
    'U'                 :'unknown',
    'Metal Rod'         :'melee',
    'Handgun/Rifle'     :'firearm',
    'BB Gun'            :'other',
    'Metal Pole'        :'melee',
    'Screwdriver'       :'melee',
    'Knife/Saw'         :'melee',
    'Knives'            :'melee'
}

In [101]:
louisville['Investigation Type'] = louisville['Investigation Type'].map(investigation_map)
louisville['suspect_weapon'] = louisville['suspect_weapon'].map(suspect_weapon_map)

In [102]:
louisvilleEncoded = louisville.copy()

## Springfiled Missouri

In [103]:
springfield = dictOfDfs['Springfiled Missouri'].copy()
springfield.info()
springfieldEncoded = springfield.drop([0,3,6,8,10,13,15,18,20,22,24,26,28]).reset_index()
springfieldEncoded

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 31 entries, 0 to 30
Data columns (total 16 columns):
 #   Column                                                        Non-Null Count  Dtype  
---  ------                                                        --------------  -----  
 0   CASE                                                          18 non-null     object 
 1   DATE                                                          18 non-null     object 
 2   OFFICER: #/RACE/SEX                                           20 non-null     object 
 3                                                        SUMMARY  18 non-null     object 
 4   SUSPECT: RACE/GENDER                                          18 non-null     object 
 5   suspect_sex                                                   18 non-null     object 
 6   suspect_race                                                  18 non-null     object 
 7   GREENE  COUNTY PROSECUTING DISPOSITION                        18 non-null

Unnamed: 0,index,CASE,DATE,OFFICER: #/RACE/SEX,SUMMARY,SUSPECT: RACE/GENDER,suspect_sex,suspect_race,GREENE COUNTY PROSECUTING DISPOSITION,number_injured,number_killed,suspect_armed,incident_initiation,num_white_officers,num_+pacific_island_officers,num_male_officers,num_female_officers
0,1,08-33295,7/24/2008,1 W/M,Officers responded to an assault. Suspect runs...,White/Male,male,white,Justified,0.0,1.0,melee,assault,1.0,0.0,1.0,0.0
1,2,10-05301,2/8/2010,1 W/M,Suspect of an assault was fleeing in a vehicle...,White/Male,male,white,Justified,1.0,0.0,other,assault,3.0,0.0,1.0,2.0
2,4,10-17643,5/1/2010,1 W/M,Officer stops suspected DWI driver. The rear p...,Black/Male,male,black,Justified,0.0,1.0,firearm,traffic_stop,1.0,0.0,1.0,0.0
3,5,13-7501,2/22/2013,9 W/M,Suspect burglarizes businesses and steals a ve...,White/Male,male,white,Justified,1.0,1.0,firearm,burglary,10.0,0.0,9.0,1.0
4,7,14-17329,5/9/2014,1 W/M,Officer sent to an address due to a panhandler...,White/Male,male,white,Charges Filed,1.0,0.0,none,other,1.0,0.0,1.0,0.0
5,9,14-17912,2014-05-13 00:00:00,2 W/M,Officers respond to a storage facility in rega...,White/Female,female,white,Justified,0.0,1.0,firearm,shooting,2.0,0.0,2.0,0.0
6,11,15-6228,2/17/2015,1 W/M,Officers respond to a suspected burglary. An O...,White/Male,male,white,Justified,0.0,1.0,unknown,burglary,1.0,0.0,1.0,0.0
7,12,15-44203,11/4/2015,2 W/M,Officer stops at what he believes is a traffic...,White/Male,male,white,Justified,0.0,1.0,firearm,traffic_stop,2.0,0.0,2.0,0.0
8,14,15-47472,11/25/2015,1 W/M,Officer responds to a domestic disturbance. Wh...,White/Male,male,white,Self inflicted with officer's duty weapon,0.0,1.0,melee,dv,1.0,0.0,1.0,0.0
9,16,16-07945,2016-02-27 00:00:00,1 W/M,Officer noticed a suspicious vehicle while out...,White/Male,male,white,Justified,0.0,0.0,other,suspicious_situation,2.0,0.0,1.0,0.0


In [104]:
for x in springfield.columns:
    print(springfield[x].value_counts())

19-50753     1
16-07945     1
18-00613     1
15-6228      1
10-05301     1
19-13200     1
08-33295     1
17-00024     1
17-20253     1
13-7501      1
14-17912     1
18-21518     1
19-7404      1
15-47472     1
15-44203     1
14-17329     1
10-17643     1
17-35394     1
Name: CASE, dtype: int64
 6/4/2018              1
11/25/2015             1
2014-05-13 00:00:00    1
2/22/2013              1
2016-02-27 00:00:00    1
 2/8/2010              1
2019-12-24 00:00:00    1
2019-02-25 00:00:00    1
5/11/2017              1
5/1/2010               1
1/1/2017               1
2/17/2015              1
 1/5/2018              1
5/9/2014               1
2017-08-12 00:00:00    1
11/4/2015              1
 7/24/2008             1
2019-04-09 00:00:00    1
Name: DATE, dtype: int64
1 W/M                   4
2 W/M                   3
 1 W/M                  2
1 W/M                   2
2 W/M                   1
9 W/M                   1
1 W/M; 1 W/F            1
3 W/M                   1
1 W/F                 

## Charolette North Carolina

In [105]:
charolette1 = dictOfDfs['Charolette North Carolina 1']
charolette2 = dictOfDfs['Charolette North Carolina offic']
charolette3 = dictOfDfs['Charolette North Carolina suspe']

In [106]:
charolette1.head()

Unnamed: 0,X,Y,INCIDENT_ID,YEAR_MONTH,LOCATION,DA_LEGAL_REVIEW,NARRATIVE,ObjectID,YR,MN,Latitude,Longitude,individual_armed,incident_initiation
0,1447824.0,552036.475729,7307,2008-08,"ORVIS STREET, CHARLOTTE 28216",Legally Justified,Officers served a high risk search warrant. Up...,1,2008,8,35.252694,-80.849701,firearm,warrant_service
1,1467399.0,553624.475134,7699,2008-11,"N TRYON STREET, CHARLOTTE 28213",Legally Justified,"Officers attempted to make a traffic stop, but...",2,2008,11,35.258041,-80.784238,firearm,traffic_stop
2,1484298.0,531574.479675,9687,2010-03,"E W.T. HARRIS BOULEVARD, CHARLOTTE 28227",No DA review,An officer was circulating an apartment comple...,3,2010,3,35.198285,-80.726353,firearm,burglary
3,1479503.0,545554.476749,10386,2010-07,"BARRINGTON DRIVE, CHARLOTTE 28278",Legally Justified,Officers observed a large crowd of individuals...,4,2010,7,35.236462,-80.743223,firearm,suspicious_situation
4,1446813.0,523173.481947,10392,2010-07,"PARK ROAD, CHARLOTTE 28209",Legally Justified,Officers responded to a robbery from business ...,5,2010,7,35.17335,-80.851284,firearm,robbery


In [107]:
charolette2OneHot = pd.get_dummies(charolette2[['INCIDENT_ID','OFFICER_RACE', 'OFFICER_GENDER']]).groupby('INCIDENT_ID').sum().reset_index()
charolette3OneHot = pd.get_dummies(charolette3[['INCIDENT_ID','INDIVIDUAL_RACE', 'INDIVIDUAL_GENDER', 'INDIVIDUAL_INJURY_TYPE']]).groupby('INCIDENT_ID').sum().reset_index()

In [108]:
charoletteMerge1 = pd.merge(charolette1,charolette2OneHot,on = 'INCIDENT_ID')
charoletteEncoded = pd.merge(charoletteMerge1,charolette3OneHot, on  = 'INCIDENT_ID').copy()

In [109]:
charoletteEncoded

Unnamed: 0,X,Y,INCIDENT_ID,YEAR_MONTH,LOCATION,DA_LEGAL_REVIEW,NARRATIVE,ObjectID,YR,MN,Latitude,Longitude,individual_armed,incident_initiation,OFFICER_RACE_American Indian/Alaskan Native,OFFICER_RACE_Asian or Pacific islander,OFFICER_RACE_Black,OFFICER_RACE_Hispanic,OFFICER_RACE_Undisclosed,OFFICER_RACE_White,OFFICER_GENDER_Female,OFFICER_GENDER_Male,OFFICER_GENDER_Undisclose,INDIVIDUAL_RACE_Asian or Pacific islander,INDIVIDUAL_RACE_Black,INDIVIDUAL_RACE_Hispanic,INDIVIDUAL_RACE_Unknown,INDIVIDUAL_RACE_White,INDIVIDUAL_GENDER_Female,INDIVIDUAL_GENDER_Male,INDIVIDUAL_INJURY_TYPE_Fatal Injury,INDIVIDUAL_INJURY_TYPE_Miss,INDIVIDUAL_INJURY_TYPE_Non-Fatal Injury
0,1.447824e+06,552036.475729,7307,2008-08,"ORVIS STREET, CHARLOTTE 28216",Legally Justified,Officers served a high risk search warrant. Up...,1,2008,8,35.252694,-80.849701,firearm,warrant_service,0,0,0,0,0,2,0,2,0,0,1,0,0,0,0,1,0,0,1
1,1.467399e+06,553624.475134,7699,2008-11,"N TRYON STREET, CHARLOTTE 28213",Legally Justified,"Officers attempted to make a traffic stop, but...",2,2008,11,35.258041,-80.784238,firearm,traffic_stop,0,0,1,0,0,3,0,4,0,0,1,0,0,0,0,1,0,0,1
2,1.484298e+06,531574.479675,9687,2010-03,"E W.T. HARRIS BOULEVARD, CHARLOTTE 28227",No DA review,An officer was circulating an apartment comple...,3,2010,3,35.198285,-80.726353,firearm,burglary,0,1,0,0,0,0,0,1,0,0,0,1,0,0,0,1,0,1,0
3,1.479503e+06,545554.476749,10386,2010-07,"BARRINGTON DRIVE, CHARLOTTE 28278",Legally Justified,Officers observed a large crowd of individuals...,4,2010,7,35.236462,-80.743223,firearm,suspicious_situation,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1
4,1.446813e+06,523173.481947,10392,2010-07,"PARK ROAD, CHARLOTTE 28209",Legally Justified,Officers responded to a robbery from business ...,5,2010,7,35.173350,-80.851284,firearm,robbery,0,0,0,0,0,1,0,1,0,0,1,0,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,1.446625e+06,558297.550629,24440,2019-03,"BEATTIES FORD ROAD, CHARLOTTE 28214",Legally Justified,"On Monday, March 25, 2019, at approximately 9 ...",79,2019,3,35.269833,-80.854107,firearm,weapon,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,1,0,0
79,1.454874e+06,575554.153696,25461,2019-11,"OAKBURN DRIVE, CHARLOTTE 28269",Pending Review,The State Bureau of Investigation (SBI) is con...,80,2019,11,35.317660,-80.827541,firearm,warrant_service,0,0,0,0,0,1,1,0,0,0,1,0,0,0,0,1,0,1,0
80,1.452927e+06,554755.475067,21414,2017-03,"JUSTICE AVENUE, CHARLOTTE 28206",Legally Justified,Metro division officers responded to a suicide...,81,2017,3,35.260424,-80.832779,firearm,suicide,0,0,0,0,0,1,0,1,0,0,0,0,0,1,0,1,1,0,0
81,1.421752e+06,545870.092961,23428,2018-07,"WILKINSON BOULEVARD, CHARLOTTE 28208",Legally Justified,Officers with the Airport Division were dispat...,82,2018,7,35.234388,-80.936614,vehicle,suspicious_situation,0,1,0,0,0,0,0,1,0,0,1,0,0,0,0,1,0,0,1


In [112]:
for x in charoletteEncoded.columns:
    print(charoletteEncoded[x].value_counts())

1.480102e+06    1
1.479570e+06    1
1.444824e+06    1
1.451438e+06    1
1.452927e+06    1
               ..
1.460048e+06    1
1.455166e+06    1
1.408026e+06    1
1.449903e+06    1
1.422246e+06    1
Name: X, Length: 82, dtype: int64
541986.478048    1
562061.473776    1
520535.482588    1
541119.306741    1
558297.550629    1
                ..
566306.472312    1
541980.477510    1
545554.476749    1
533403.479333    1
526257.480946    1
Name: Y, Length: 82, dtype: int64
22525    1
1953     1
3490     1
2596     1
10789    1
        ..
9687     1
20825    1
22580    1
3932     1
20736    1
Name: INCIDENT_ID, Length: 83, dtype: int64
2016-01    3
2010-07    2
2013-09    2
2008-05    2
2016-08    2
          ..
2019-03    1
2007-10    1
2015-03    1
2016-09    1
2010-03    1
Name: YEAR_MONTH, Length: 68, dtype: int64
WILKINSON BOULEVARD, CHARLOTTE 28208    4
ALBEMARLE ROAD, CHARLOTTE 28212         3
E TRADE STREET, CHARLOTTE 28202         2
BEATTIES FORD ROAD, CHARLOTTE 28214     1
JORDAN

## All Cities Combined

In [158]:
pd.concat([denverEncoded, 
           la_countyEncoded,
           jacksonvilleEncoded,
           seattleEncoded,
           tacomaEncoded,
           cincinnatiEncoded,
           indianapolisEncoded,
           orlandoEncoded,
           springfieldEncoded,
           portlandEncoded,
           louisvilleEncoded,
           springfieldEncoded,
           charoletteEncoded]
         )

Unnamed: 0,id,city,state,date,time,day_of_week,call_origination,incident_initiation,address,officer_first_last,role,officer_rank,officer_badge,officer_on_duty,in_uniform,suspect_gender,suspect_age,suspect_race,suspect_ethnicity,suspect_armed,suspect_shots_fired,injury,justified,x_coord,y_coord,Corporal,Detective,Officer,Sergeant,Technician,suspect_gender_F,suspect_gender_M,suspect_race_A,suspect_race_B,suspect_race_W,suspect_ethnicity_H,suspect_ethnicity_NH,suspect_armed_firearm,suspect_armed_other,suspect_armed_unarmed,zip,district,geo_location,unit_id,unit_name,num_officers_involved,officer_race,num_of_citizens,citizen_race,num_citizens_wounded,num_citizens_killed,citizen_armed,citizen_armed_description,unit,num_white_officers,num_black_officers,num_hispanic_officers,num_filipino_officers,num_asian-pacific_officers,num_isl_officers,num_asian-pacific-isl_officers,num_unknown_officers,num_white_persons,num_black_persons,num_hispanic_persons,num_unknown_persons,id2,street_number,street_name,street_type,street_direction,apartment_number,zone,sub_zone,incident_location,officer_name,officer_sex,officer_age,officer_year_of_services,citizen_name,citizen_sex,citizen_age,citizen_injured,citizen_killed,UOFpolicy,training?,IA?,SAO,RTR,num_asian_officers,average_officer_age,average_officer_tenure,num_white_citizens,num_black_citizens,num_hispanic_citizens,num_asian_citizens,average_citizen_age,thing1,date/time,officer_year_of_service,officer_injured,num_of_rounds,citizen_DOB,citizen_weapon,on_duty,disposition,officer_disciplined,summary,officer_rank_detective,officer_rank_lieutenant,officer_rank_officer,officer_rank_sergeant,officer_rank_student_officer,officer_sex_Female,officer_sex_Male,officer_race_asian,officer_race_black,officer_race_latino,officer_race_multi-racial,officer_race_native_american,officer_race_pacific-islander,officer_race_white,officer_injured_No,officer_injured_Yes,officer_disciplined_Missing,officer_disciplined_No,officer_disciplined_Yes,citizen_sex_Female,citizen_sex_Male,citizen_race_asian,citizen_race_black,citizen_race_hispanic,citizen_race_islander,citizen_race_native_american,citizen_race_unknown,citizen_race_white,citizen_weapon_explosive,citizen_weapon_firearm,citizen_weapon_melee,citizen_weapon_other,sector,officer_years_of_service,bad_gender,corrected_citizen_gender,corrected_citizen_race,corrected_citizen_sex,officer_race_Asian,officer_race_Black,officer_race_Hispanic,officer_race_White,corrected_citizen_race_Asian,corrected_citizen_race_Black,corrected_citizen_race_Hispanic,corrected_citizen_race_Nat Am,corrected_citizen_race_Native Amer,corrected_citizen_race_White,corrected_citizen_sex_Female,corrected_citizen_sex_Male,incident_description,officer_race_BLACK,officer_race_HISPANIC/LATINO,officer_race_WHITE,officer_sex_FEMALE,officer_sex_MALE,citizen_race_ASIAN,citizen_race_BLACK,citizen_race_HISPANIC/LATINO,citizen_race_UNKNOWN,citizen_race_WHITE,citizen_sex_FEMALE,citizen_sex_MALE,citizen_sex_OTHER,division,shift,citizen_injury,officer_injury,citizent_age,City,State,Case #,Date,Address,Incident Type,Number of Officers Involved,Officer Name,Officer Race,Ethnicity,Officer Gender,Officer Age,Officer Tenure,Number of Suspects Involved,Suspect Name,Suspect Race,Suspect Gender,Suspect Age,Suspect's Weapon,Suspect Hit,Fatal,Officer Hit,Fatal (Officer),Review Letter,Georeferenced Column,officers_white,officers_black,officers_other,officers_race_exempt,officers_male,officers_female,officers_sex_unknown,officers_injured,officers_killed,suspects_white,suspects_black,suspects_other,suspects_race_unknown,suspects_male,suspects_female,suspects_sex_unknown,suspects_injured,suspects_killed,index,CASE,DATE,OFFICER: #/RACE/SEX,SUMMARY,SUSPECT: RACE/GENDER,suspect_sex,GREENE COUNTY PROSECUTING DISPOSITION,number_injured,number_killed,num_+pacific_island_officers,num_male_officers,num_female_officers,Any Officer Fatal Injuries?,Any Officers Injured on Case?,Day of Date Time,Deadly Force Type,Did Subject Fire Shots at Officers?,General Location,Grand Jury Transcripts link,Initial Call Type,Internal Affairs Investigation Link,Internal Affairs Investigation Link 2 (as needed),Officer Sex,Online PDF Link,Subject Name,Subject Race,Subject Sex,Subject Weapon Type (NIBRS),Was Subject Actually Armed?,Was Subject Injured?,Was Subject Injury Fatal?,Was Subject Percieved as Armed?,Number of Officers Injured on Case,Officer Tenure (yrs),Subject Age,Officer Race_Asian,Officer Race_Black,Officer Race_Hispanic,Officer Race_Male,Officer Race_Undisclosed,Officer Race_White,Officer Sex_Female,Officer Sex_Male,Subject Race_Black,Subject Race_Hispanic,Subject Race_White,Subject Sex_Female,Subject Sex_Male,Was Subject Injured?_No,Was Subject Injured?_Yes,Was Subject Injury Fatal?_No,Was Subject Injury Fatal?_Yes,#,Incident #,Month,Day,Time of Occurrence,Address of incident,Division,Beat,Investigation Type,Case Status,suspect_Name,suspect_weapon,officer_ethnicity,Lethal Y/N,Narrative,num_of_officers,X,Y,INCIDENT_ID,YEAR_MONTH,LOCATION,DA_LEGAL_REVIEW,NARRATIVE,ObjectID,YR,MN,Latitude,Longitude,individual_armed,OFFICER_RACE_American Indian/Alaskan Native,OFFICER_RACE_Asian or Pacific islander,OFFICER_RACE_Black,OFFICER_RACE_Hispanic,OFFICER_RACE_Undisclosed,OFFICER_RACE_White,OFFICER_GENDER_Female,OFFICER_GENDER_Male,OFFICER_GENDER_Undisclose,INDIVIDUAL_RACE_Asian or Pacific islander,INDIVIDUAL_RACE_Black,INDIVIDUAL_RACE_Hispanic,INDIVIDUAL_RACE_Unknown,INDIVIDUAL_RACE_White,INDIVIDUAL_GENDER_Female,INDIVIDUAL_GENDER_Male,INDIVIDUAL_INJURY_TYPE_Fatal Injury,INDIVIDUAL_INJURY_TYPE_Miss,INDIVIDUAL_INJURY_TYPE_Non-Fatal Injury
0,2020182,Denver,Colorado,2020-01-01,120,Wednesday,Citizen-Initiated,weapon,901 S Irving St,"Archuleta, Diego",Officer,Officer,P16061,On-Duty City Paid,Police Uniform,M,25,W,H,firearm,Yes,Not Injured,Investigation Pending,3132258,1680228,0.0,0.0,2.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,2.0,1.0,2.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1,201516702,Denver,Colorado,2015-01-09,1940,Friday,Officer-Initiated,traffic_stop,5081 N Crown Blvd,"Kindell, Sharod",Subject,Officer,P08002,On-Duty City Paid,Police Uniform,M,24,B,NH,other,Yes,Injured,http://www.denverda.org/News_Release/Decision_...,3188620,1712709,0.0,0.0,1.0,0.0,0.0,0.0,2.0,0.0,1.0,1.0,0.0,2.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
2,201549266,Denver,Colorado,2015-01-26,700,Monday,Citizen-Initiated,suspicious_situation,2500 blk N Newport St / N Niagara St Alley,"Hernandez, Jessica",Subject,Officer,P05076,On-Duty City Paid,Police Uniform,F,17,W,H,other,Yes,Deceased,https://www.denverda.org/wp-content/uploads/de...,3165976,1700000,0.0,0.0,2.0,0.0,0.0,1.0,2.0,0.0,0.0,3.0,1.0,2.0,2.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
3,201621883,Denver,Colorado,2016-01-11,1529,Monday,Officer-Initiated,traffic_stop,2601 N Zuni St,"Lonergan, Ramone",Subject,Detective,P96014,On-Duty City Paid,Plain Clothes,M,32,W,NH,firearm,Yes,Deceased,https://www.denverda.org/wp-content/uploads/de...,3135850,1700412,0.0,1.0,1.0,0.0,0.0,0.0,3.0,0.0,0.0,3.0,1.0,2.0,3.0,0.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
4,201889020,Denver,Colorado,2018-02-06,1756,Tuesday,Citizen-Initiated,burglary,1873 S Alcott St,"Duran, Alexander",Subject,Corporal,P98027,On-Duty City Paid,Police Uniform,M,29,W,H,other,No,Deceased,https://www.denverda.org/wp-content/uploads/de...,3135881,1673982,1.0,0.0,0.0,0.0,0.0,0.0,2.0,0.0,0.0,2.0,1.0,1.0,1.0,1.0,0.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
78,,,,NaT,,,,weapon,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.446625e+06,558297.550629,24440.0,2019-03,"BEATTIES FORD ROAD, CHARLOTTE 28214",Legally Justified,"On Monday, March 25, 2019, at approximately 9 ...",79.0,2019.0,3.0,35.269833,-80.854107,firearm,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0
79,,,,NaT,,,,warrant_service,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.454874e+06,575554.153696,25461.0,2019-11,"OAKBURN DRIVE, CHARLOTTE 28269",Pending Review,The State Bureau of Investigation (SBI) is con...,80.0,2019.0,11.0,35.317660,-80.827541,firearm,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0
80,,,,NaT,,,,suicide,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.452927e+06,554755.475067,21414.0,2017-03,"JUSTICE AVENUE, CHARLOTTE 28206",Legally Justified,Metro division officers responded to a suicide...,81.0,2017.0,3.0,35.260424,-80.832779,firearm,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,1.0,0.0,0.0
81,,,,NaT,,,,suspicious_situation,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1.421752e+06,545870.092961,23428.0,2018-07,"WILKINSON BOULEVARD, CHARLOTTE 28208",Legally Justified,Officers with the Airport Division were dispat...,82.0,2018.0,7.0,35.234388,-80.936614,vehicle,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0
