In [None]:
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
import sklearn.metrics as skm
import sklearn.preprocessing as skp
from sklearn.model_selection import train_test_split, GridSearchCV, KFold, ParameterGrid
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
from sklearn.linear_model import LinearRegression, LogisticRegressionCV
from sklearn.naive_bayes import GaussianNB
from sklearn.impute import SimpleImputer
from sklearn import set_config
from bs4 import BeautifulSoup

import pandas as pd
import numpy as np

pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', None)
set_config(display = 'diagram')

sheets = ['Denver Colorado', 
          'Los Angeles County California',
          'Jacksonville County Florida', 
          'Seattle County Washington', 
          'Tacoma Washington', 
          'Atlanta Georgia',
          'Bloomington Illinois',
          'Cincinnati Ohio',
          'Dallas Texas', 
          'Hampton',
          'Hartford Conneticut',
          'Indianapolis Indiana',
          'Orlando Florida',
          'Sparks Nevada',
          'Portland Oregon',
          'Louisville Kentucky',
          'Springfiled Missouri',
          'Charolette North Carolina 1',
          'Charolette North Carolina suspe',
          'Charolette North Carolina offic']

dictOfDfs = {}
for x in sheets:
    dictOfDfs[x] = pd.read_excel('officer_involved_shooting_all_info.xlsx', sheet_name=x)


# for key in dictOfDfs:
#     display(dictOfDfs[key].head())

## Denver

denver = dictOfDfs['Denver Colorado'].copy()

denver.columns = [
    'city', 
    'state', 
    'id', 
    'date', 
    'time', 
    'day_of_week', 
    'call_origination',
    'incident_initiation',
    'address', 
    'officer_first_last', 
    'role', 
    'officer_rank', 
    'officer_badge',  
    'officer_on_duty',
    'in_uniform', 
    'suspect_gender',
    'suspect_age',
    'suspect_race',
    'suspect_ethnicity',
    'suspect_armed',
    'suspect_shots_fired',
    'injury',
    'justified',
    'x_coord',
    'y_coord'
]

# for x in denver.columns:
#     display(denver[x].value_counts())

### Create categorical variable maps

suspect_armed_map = {
    'Firearm'                 : 'firearm',
    'Knife'                   : 'other',
    'None'                    :'unarmed',
    'Replica or Air Gun'      :'other',
    'Simulated Weapon'        :'unarmed',
    'Motor Vehicle'           :'other',
    'Blunt Object'            :'other',
    'Stun Gun'                :'other',
    'Shotgun'                 :'firearm'
}

incident_initiation_map = {
    'Weapon / Concealed Weapon'                 : 'weapon',                    
    'Warrant'                                   : 'warrant',                                    
    'Vehicle Stop'                              : 'traffic_stop',                                
    'Shots Fired'                               : 'shooting',                                  
    'Shooting'                                  : 'shooting',                                     
    'Street Robbery'                            : 'robbery',
    'Robbery - In Progress / Just Occurred'     : 'robbery'  ,       
    'Burglary - In Progess'                     : 'burglary',
    'Suspicious Vehicle'                        : 'suspicious_situation',                           
    'Surveillance'                              : 'police_surveillance' ,    
    'Domestic Violence - In Progress'           : 'dv'   ,           
    'Disturbance'                               : 'disturbance'  ,                                
    'Burglary - In Progress'                    : 'burglary' ,                      
    'Bank Robbery Suspect'                      : 'bolo',                         
    'Shots Heard/Fired'                         :'shooting'      ,                      
    'Suicidal Person / Suicide'                 : 'suicide' ,                   
    'Vehicle Check'                             : 'traffic_stop'  ,                              
    'Shot Spotter'                              : 'shooting'  ,                              
    'Robbery - Car Jacking '                    : 'robbery'  ,                      
    'Family Disturbance'                        : 'dv'   ,                        
    'Harassment in Progress Involving a Weapon' : 'weapon' ,
    'Burglary-In Progress'                      : 'burglary'     ,                    
    'Burglary/Suspicious Occurrence'            : 'burglary',
    'Man with a Gun'                            : 'weapon' ,                              
    'BOLO (Be on the lookout)'                  : 'bolo' ,                     
    'Business Robbery'                          : 'robbery'                             
}

denver['incident_initiation'] = denver['incident_initiation'].map(incident_initiation_map, na_action = 'ignore')
denver['suspect_armed'] = denver['suspect_armed'].map(suspect_armed_map, na_action = 'ignore')

denver.info()

officers = pd.get_dummies(denver['officer_rank'])
suspects = pd.get_dummies(denver[['suspect_gender','suspect_race','suspect_ethnicity', 'suspect_armed']])
officerSuspectOneHot = pd.concat([denver['id'],officers,suspects],axis = 1)
dfDenver1 = officerSuspectOneHot.groupby(by = ['id'], axis = 0, dropna = False).sum()
dfDenver2 = denver.groupby(['id'], axis = 0, dropna = False).first()
dfDenver = pd.merge(dfDenver2,dfDenver1, how = 'left', on = 'id').reset_index()
dfDenver.drop(columns = [
    'address', 
    'officer_badge', 
    'officer_on_duty', 
    'in_uniform', 
    'suspect_gender', 
    'suspect_ethnicity', 
    'suspect_armed',
    'justified',
    'role',
    'officer_rank',
    'suspect_race'
])


## LA County

la_county = dictOfDfs['Los Angeles County California'].copy()
la_county

la_county['PERSON RACE'] = la_county['PERSON RACE'].astype('str')

### Category Mappings to limit features

def func(x):
    if x in ['LOS ANGELES',                     
            'COMPTON'                  ,       
            'LANCASTER'                 ,      
            'PALMDALE'                   ,     
            'EAST LOS ANGELES'            ,    
            'PICO RIVERA'                  ,   
            'BELLFLOWER'                    ,   
            'LYNWOOD'                        ,  
            'NORWALK'                         , 
            'PARAMOUNT'                        ,
            'LAKEWOOD'                         ,
            'WHITTIER']:
        return x
    else:
        return 'other'

def func1(x):
    if x in [
        'CENTURY STN',                          
        'COMPTON STN',                          
        'EAST LA STN ',                         
        'LAKEWOOD STN' ,                        
        'SOUTH LOS ANGELES STATION',          
        'OPERATION SAFE STREETS BUREAU',      
        'PICO RIVERA STN'               ,       
        'INDUSTRY STN'                   ,      
        'LANCASTER STN'                   ,     
        'PALMDALE STN'                     ,    
        'SPECIAL ENFORCEMENT BUR'           ,   
        'TEMPLE CITY STN'                    ,  
        'SANTA CLARITA VALLEY STN'            , 
        'MAJOR CRIMES BUREAU'                  , 
        'CARSON STN'                            ,
        'NORWALK REGIONAL STN'                  ,
        'TRANSIT SERVICES BUREAU'               
    ]:
        return x
    else:
        return 'other'

weapon_map= {
    'A-1'   : 'firearm',
    'A-2'    :'firearm',
    'A-4'    :'unknown',
    'B-2'    : 'other',
    'B-1'    : 'other',
    'C-1'    : 'unarmed',
    'A-3'    : 'unknown',
    'D-1'    : 'unarmed',
    'B-4'    :  'unknown',
    'E-1'    :  'unarmed'
}

def get_specific_type(df, column, elementOfInterest,seperating_value = ','):
    def breakout_column(df,column,seperating_value):
        listOfDicts = []
        types = {}
        for x in df[column]:
            types[x] = types.get(x,0) + 1
        for x in df[column]:
            dictionary = {}
            broken_string = x.split(seperating_value)
            for y in broken_string:
                dictionary[y] = dictionary.get(y,0) + 1
            listOfDicts.append(dictionary)
        return listOfDicts
    breakoutColumn = breakout_column(df, column, seperating_value)
    return [x[elementOfInterest] if (elementOfInterest in list(x.keys())) else 0 for x in breakoutColumn]

### Officer encoding

la_county['CITY'].map(func, na_action = 'ignore')
la_county['HANDLING UNIT NAME'].map(func1, na_action = 'ignore')
la_county['WEAPON INVOLVED CATEGORY'].map(weapon_map, na_action = 'ignore')
la_county['num_white_officers'] =  get_specific_type(la_county, 'DEPUTY RACE', 'WHITE')
la_county['num_black_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'BLACK')
la_county['num_hispanic_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'HISPANIC')
la_county['num_filipino_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'FILIPINO')
la_county['num_asian-pacific_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'ASIAN-PACIFIC')
la_county['num_isl_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'ISL')
la_county['num_asian-pacific-isl_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'ASIAN-PACIFIC-ISL')
la_county['num_unknown_officers'] = get_specific_type(la_county, 'DEPUTY RACE', 'UNKNOWN')

### Subject Encoding

la_county['PERSON RACE']

la_county['num_white_persons'] = get_specific_type(la_county, 'PERSON RACE', 'WHITE')
la_county['num_black_persons'] = get_specific_type(la_county, 'PERSON RACE', 'BLACK')
la_county['num_hispanic_persons'] = get_specific_type(la_county, 'PERSON RACE', 'HISPANIC')
la_county['num_unknown_persons'] = get_specific_type(la_county, 'PERSON RACE', None)


la_county.info()

la_county.drop(columns = [
    'INCIDENT NUMBER',
    'INCIDENT LOCATION',
    'GEO_LOCATION',
    'DEPUTY RACE',
    'PERSON RACE',
    'WEAPON INVOLVED CATEGORY DESC'
])

## Jacksonville

### Into exploration

jacksonville = dictOfDfs['Jacksonville County Florida'].copy()

jacksonville.info()
jacksonville.head()

### Combining encoding for categorical variables with too many types. 

original_incident_map = {
    'Traffic Violations'               : 'traffic_stop', 
    'All Other'                         :'other',
    'Robbery'                           :'robbery',
    'Assault/Battery'                   :'assault',
    'Armed Robbery'                     :'robbery',
    'Suspicious Person'                 :'suspicious_situation',
    'Traffic Stop'                      :'traffic_stop',
    'Narcotics Investigation'           :'police_surveillance',
    'Burglary'                          :'burglary',
    'Auto Theft'                        :'robbery',
    'Armed Dispute'                     :'weapon',
    'Person Shot'                       :'shooting',
    'Domestic Battery'                  :'dv',
    'Domestic Violence'                 :'dv',
    'Aggravated Battery'                :'assault',
    'Domestic'                          :'dv',
    'Intoxicated Person'                :'intoxication',
    'Suicide'                           :'suicide',
    'Armed Person'                      :'weapon',
    'Armed Prowler'                     :'weapon',
    'Suicide Threat / Armed Dispute'    :'suicide',
    'Murder'                            :'bolo',
    'Search Warrant'                    :'warrant',
    'Carjacking'                        :'robbery',
    'Shots fired'                       :'shooting',
    'Abduction/Kidnap'                  :'abduction',
    'Wanted Person'                     :'bolo',
    'Bomb Investigation'                :'police_surveillance'
}

subject_weapon_map = {
    'Handgun'                                :'firearm',
    'Knife / Cutting Instr'                  :'melee',
    'Vehicle'                                :'other',
    'Rifle'                                  :'firearm',
    'Shotgun'                                :'firearm',
    'Not Applicable (None)'                  :'None',
    'Firearm (Type Not Stated)'              :'firearm',
    'Taser'                                  :'other',
    'Knife / Cutting Instr; Poison (Includes Gas)' : 'melee',
    'Replica Handgun'                        :'other',
    'Knife'                                  :'melee',
    'Personal Weapons (Hands/ Fist/ Feet/Teeth/Etc.); Simulated Weapon' : 'None',
    'Sword'                                  :'melee',
    'BB Pistol'                              :'other',
    'None'                                   :'None'
}

jacksonville['SubjectWeapon'] = jacksonville['SubjectWeapon'].map(subject_weapon_map)
jacksonville['OriginalIncidentType'] = jacksonville['OriginalIncidentType'].map(original_incident_map)

def split_num_string(column):
    temp = []
    for x in column:
        try:
            thing = x.split('; ')
            newTemp = []
            for y in thing:
                newTemp.append(int(y))
            temp.append(newTemp)
        except:
            temp.append(x)
    return temp

def check_int_or_list(element):
    temp = []
    for x in element:
        try:
            temp.append(sum(x)/len(x))
        except:
            temp.append(x)
    return temp

### Encoding poorly formatted string data

#### Officer columns

jacksonville['OfficerAge'] = split_num_string(jacksonville['OfficerAge']) 
jacksonville['OfficerTenure'] = split_num_string(jacksonville['OfficerTenure'])
jacksonville['num_white_officers'] =  get_specific_type(jacksonville, 'OfficerRace', 'W', seperating_value='; ')
jacksonville['num_black_officers'] = get_specific_type(jacksonville, 'OfficerRace', 'B', seperating_value='; ')
jacksonville['num_hispanic_officers'] = get_specific_type(jacksonville, 'OfficerRace', 'H', seperating_value='; ')
jacksonville['num_asian_officers'] = get_specific_type(jacksonville, 'OfficerRace', 'A', seperating_value='; ')
jacksonville['average_officer_age'] = check_int_or_list(jacksonville['OfficerAge'])
jacksonville['average_officer_tenure'] = check_int_or_list(jacksonville['OfficerTenure'])


#### Subject Columns

jacksonville['SubjectAge'] = split_num_string(jacksonville['SubjectAge']) 
jacksonville['num_white_subjects'] =  get_specific_type(jacksonville, 'SubjectRace', 'W', seperating_value='; ')
jacksonville['num_black_subjects'] = get_specific_type(jacksonville, 'SubjectRace', 'B', seperating_value='; ')
jacksonville['num_hispanic_subjects'] = get_specific_type(jacksonville, 'SubjectRace', 'H', seperating_value='; ')
jacksonville['num_asian_subjects'] = get_specific_type(jacksonville, 'SubjectRace', 'A', seperating_value='; ')
jacksonville['average_subject_age'] = check_int_or_list(jacksonville['SubjectAge'])

jacksonville.OriginalIncidentType.value_counts()

# jacksonville['OriginalIncidentType'] = jacksonville['OriginalIncidentType'].map(original_incident_map, na_action = 'ignore')
# jacksonville.OriginalIncidentType.value_counts()

jacksonville['SubjectWeapon'].value_counts()

jacksonville

# jacksonville.drop(columns = ['IncidentNbr','RTRCaseNbr','StreetNbr', 'StreetName', 'StreetType', 'StreetDirection',
#        'ApartmentNbr','PostalCode', 'Zone',
#        'SubSector', 'IncidentLocation', 'OfficerName','UOFWithinPolicy',
#        'FurtherSituationalTrainingRequired', 'ReferredToIA', 'SAOLetterLinks',
#        'RTRCaseStatus'])

## Seattle Washington

### Intro Exploration

seattle = dictOfDfs['Seattle County Washington'].copy()
seattle.info()
seattle.head()

### Collapsing string categories

type_of_weapon_map = {
    'Handgun'                                       :'firearm',
    'Knife'                                         :'melee',
    'Gun'                                           :'firearm',
    'Vehicle'                                       :'other',
    'Multiple Types'                                :'other',
    'Multiple Firearms'                             :'firearm',
    'Rifle'                                         :'firearm',
    'Metal Bar'                                     :'Melee',
    'Grenade, backpack reported to have explosives' :'explosive',
    '.22 caliber pistol'                            :'firearm',
    'Colt Revolver'                                 :'firearm',
    '6 shot .357 revolver'                          :'firearm',
    'Rifle w/ bayonet'                              :'firearm',
    'broken bottle'                                 :'melee',
    'Board'                                         :'melee',
    'Screwdriver'                                   :'melee',
    '9mm semi-automatic'                            :'firearm',
    'Air soft rifle'                                :'other',
    '.357 revolver'                                 :'firearm',
    'Semil automatic .38 caliber handgun'           :'firearm',
    'Mac-10, 9 mm machine pistol'                   :'firearm'
}

rank_map = {
    'Officer'                  :'officer',
    'POLICE OFFICE'            :'officer',
    'Sergeant'                 :'sergeant',
    'Detective'                :'detective',
    'POLICE OFFIVER PROBATION' :'officer',
    'Student Officer'          :'student_officer',
    'POLICE OFFICER DETECTIVE' :'detective',
    'POLICE LIEUTENANT'        :'lieutenant',
    'FTO'                      :'officer',
    'POLICE SERGEANT'          :'sergeant'
}

subject_race_map = {
    'White'                        :'white',
    'Black or African American'    :'black',
    'Asian'                        :'asian',
    'Native American'              :'native_american',
    'Not Specified'                :'unknown',
    'Hispanic'                     :'hispanic',
    'Nat Hawaiian/Oth Pac Islander':'islander'
}

officer_race_map = {
    'White'                        :'white',
    'AI/AN'                        :'native_american',
    'Black or African American '   :'black',
    'Hispanic/Lation'              :'latino',
    'Asian/Pacific Islander'       :'pacific-islander',
    'Black'                        :'black',
    'Hispanic or Latino'           :'latino',
    'Multi-Racial'                 :'multi-racial',
    'Asian'                        :'asian',
    'American Indian/Alaska Native':'native_american',
    'Two or More Races'            :'multi-racial',
    'Nat Hawaiian/Oth Pac Islander':'pacific-islander',
}

seattle['Type of Weapon'] = seattle['Type of Weapon'].map(type_of_weapon_map)
seattle['Rank'] = seattle['Rank'].map(rank_map)
seattle['Officer Race'] = seattle['Officer Race'].map(officer_race_map)
seattle['Subject Race'] = seattle['Subject Race'].map(subject_race_map)

### Encoding Officer and Subject Information

officers = pd.get_dummies(seattle[
     [
         'Incident Number',
         'Rank', 
         'Officer Gender', 
         'Officer Race',  
         'Officer Injured',
         'Officer Disciplined?'
     ]
])
subjects = pd.get_dummies(seattle[
    [
        'Subject Gender',
        'Subject Race',
        'Type of Weapon'
    ]
])
officersSubjects = pd.concat([ officers, subjects], axis = 1)
seattleEncoded = pd.concat([seattle.groupby(['Incident Number']).first(), officersSubjects.groupby(['Incident Number']).sum()], 
                           axis = 1)
seattleEncoded

# seattle.drop(columns = ['FRB #', 'Incident Number','Blurred Address','Subject DOB','On-duty',
#        'Disposition', 'Officer Disciplined?', 'Summary'])

## Tacoma Washington

### Intro Exploration

tacoma = dictOfDfs['Tacoma Washington'].copy()
tacoma.info()
tacoma.head(20)

for columns in tacoma.columns:
    print(tacoma[columns].value_counts())

### Fixing Data Switching between columns

def the_switchero(df, column1):
    newColumn1 = []
    for x in df[column1]:
        if (x == 'Male') | (x == 'Female'):
            newColumn1.append(1)
        else:
            newColumn1.append(0)
    df['bad_gender'] = newColumn1
    corrected_gender = []
    corrected_race = []
    for z,k in df.iterrows():
        if k['bad_gender'] == 1:
            corrected_gender.append(k['Citizen Race'])
            corrected_race.append(k['Citizen Gender'])
        else:
            corrected_gender.append(k['Citizen Gender'])
            corrected_race.append(k['Citizen Race'])
    return corrected_gender,corrected_race



tacoma['corrected_citizen_gender'], tacoma['corrected_citizen_race'] = the_switchero(tacoma, 'Citizen Race')

### Officer and Citizen Encoding

officerCitizenCat = pd.get_dummies(tacoma[['Inc Num','Officer Race', 'Officer Gender','Citizen Race', 'Citizen Gender']])
tacomaEncoded = pd.concat([tacoma.groupby('Inc Num').first(), officerCitizenCat.groupby('Inc Num').sum()],axis = 1)


tacomaEncoded

tacoma.drop(columns = ['IA Num', 'Inc Num', 'Incident Address','Date Received','Inside/Outside','Disposition', 'Action taken'])

## Cincinnati Ohio

cincinnati = dictOfDfs['Cincinnati Ohio'].copy()
cincinnati.info()
cincinnati.head()

for x in cincinnati.columns:
    print(cincinnati[x].value_counts())

cincinnatiShooting = cincinnati.copy().loc[cincinnati['INCIDENT_DESCRIPTION'] == 'USE OF FORCE INVESTIGATION'].groupby(['CASE_NO']).first()

officersSubjects = pd.get_dummies(cincinnatiShooting[['OFFICER_RACE', 'OFFICER_GENDER', 'SUBJECT_RACE', 'SUBJECT_GENDER']])

officersSubjectsCat = officersSubjects.groupby(['CASE_NO']).sum()
cincinnatiShootingFirst = cincinnatiShooting.groupby(['CASE_NO']).first()
cincinnatiEncoded = pd.concat([cincinnatiShootingFirst, officersSubjectsCat], axis = 1)
cincinnatiEncoded

cincinnati.drop(columns = ['INCIDENT_LOCATION_X', 'INCIDENT_NO','CASE_NO', 'CFS_NO','FIREARM_MAKE', 'FIREARM_MODEL','SNA_NEIGHBORHOOD', 'CPD_NEIGHBORHOOD',
       'COMMUNITY_COUNCIL_NEIGHBORHOOD'])

## Dallas Texas

dallas = dictOfDfs['Dallas Texas'].copy()
dallas.info()
dallas.head()

dallas['Case #'].value_counts()

def get_awful_string(df, column):
    breakout = []
    for y in df[column].map(lambda x: x.split(' ')):
        tempList = []
        for k in range(len(y)):
            if ((k % 3)+1)% 3 == 0:
                tempList.append(y[k])
            else:
                continue
        breakout.append(tempList)
    return breakout
get_awful_string(dallas,'Subject(s)')

# for x in get_awful_string(dallas,'Subject(s)'):

dallas['x_y_coord'] = dallas.GeoLocation.map(lambda x: x.replace('(', '').replace(',','').replace(')','').strip().split()[-2:])

dallas.columns

dallas.drop(columns = ['Case #', 'Grand Jury Disposition',
       'Attorney General Forms URL', 'Summary URL', 'GeoLocation'])

## Hartford Conneticut

hartford = dictOfDfs['Hartford Conneticut'].copy()
hartford.info()
hartford

hartford['UCR_2_Description'].value_counts()

### Getting rid of animal shootings

hartfordShooting = hartford.loc[(hartford['UCR_2_Description'] != 'ANIMAL COMPLNT') & 
                                (hartford['UCR_2_Description'] != 'ANIMAL BITE') & 
                                (hartford['UCR_1_Description'] != 'ANIMAL COMPLNT')]
hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'SHOOTING-INJ') | 
                     (hartfordShooting['UCR_2_Description'] == 'SHOOTING-INJ')]

hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'SHOOTING-MISS') | 
                     (hartfordShooting['UCR_2_Description'] == 'SHOOTING-MISS')]

hartfordShooting.loc[(hartfordShooting['UCR_1_Description'] == 'FATAL SHOOTING') | 
                     (hartfordShooting['UCR_2_Description'] == 'FATAL SHOOTING')]

### Creating injury column from ucr description

hartfordShooting['UCR_2_Description'].isin(['SHOOTING-INJ',])

### Reducing features

ucr1_description_map = {
    ''
}

hartfordShooting['UCR_1_Description'].value_counts()

hartfordShooting['UCR_2_Description'].value_counts()

hartford['UCR_1_Category'].map(lambda x: x.replace('-','').replace('*','').split()[-1])



## Indianapolis Indiana

### Intro data exploration

indianapolis = dictOfDfs['Indianapolis Indiana']
indianapolis.info()
indianapolis.head()

for x in indianapolis.columns:
    print(indianapolis[x].value_counts())

indianapolis[['id', 'officerRace']]

officers = pd.concat([indianapolis['id'],pd.get_dummies(indianapolis[['officerRace','officerSex']])], axis = 1)
officersGrouped = officers.groupby(['id']).sum()
indianapolisThing = indianapolis.groupby(['id']).first()
indianapolisEncoded = pd.concat([indianapolisThing, officersGrouped], axis = 1)
indianapolisEncoded.reset_index().head()

## Orlando Florida

orlando = dictOfDfs['Orlando Florida'].copy()
orlando.info()
orlando.head()

for x in orlando.columns:
    print(orlando[x].value_counts())

orlando['officers_white'] = get_specific_type(orlando, 'Officer Race', 'W', seperating_value=', ')
orlando['officers_black'] = get_specific_type(orlando, 'Officer Race', 'B', seperating_value=', ')
orlando['officers_other'] = get_specific_type(orlando, 'Officer Race', 'O', seperating_value=', ')
orlando['officers_race_exempt'] = get_specific_type(orlando, 'Officer Race', 'Exempt', seperating_value=', ')
orlando['officers_male'] = get_specific_type(orlando, 'Officer Gender', 'M', seperating_value=', ')
orlando['officers_female'] = get_specific_type(orlando, 'Officer Gender', 'F', seperating_value=', ')
orlando['officers_sex_unknown'] = get_specific_type(orlando, 'Officer Gender', 'Unknown', seperating_value=', ')
orlando['officers_injured'] = get_specific_type(orlando, 'Officer Hit', 'Yes', seperating_value=', ')
orlando['officers_killed'] = get_specific_type(orlando, 'Fatal (Officer)', 'Yes', seperating_value=', ')

orlando['suspects_white'] = get_specific_type(orlando, 'Suspect Race', 'White', seperating_value=', ')
orlando['suspects_black'] = get_specific_type(orlando, 'Suspect Race', 'Black', seperating_value=', ')
orlando['suspects_other'] = get_specific_type(orlando, 'Suspect Race', 'Other', seperating_value=', ')
orlando['suspects_race_unknown'] = get_specific_type(orlando, 'Suspect Race', 'Unknown', seperating_value=', ')
orlando['suspects_male'] = get_specific_type(orlando, 'Suspect Gender', 'M', seperating_value=', ')
orlando['suspects_female'] = get_specific_type(orlando, 'Suspect Gender', 'F', seperating_value=', ')
orlando['suspects_sex_unknown'] = get_specific_type(orlando, 'Suspect Gender', 'Unknown', seperating_value=', ')
orlando['suspects_injured'] = get_specific_type(orlando, 'Suspect Hit', 'Yes', seperating_value=', ')
orlando['suspects_killed'] = get_specific_type(orlando, 'Fatal', 'Yes', seperating_value=', ')

orlando

## Sparks Nevada

sparks = dictOfDfs['Sparks Nevada'].copy()
sparks.info()
sparks.head()

## Portland Oregon

portland = dictOfDfs['Portland Oregon'].copy()
portland.info()
portland.head()

for x in portland.columns:
    print(portland[x].value_counts())

officersSubjectsCat = pd.get_dummies(portland[['Officer Race', 
                                               'Officer Sex', 
                                               'Subject Race', 
                                               'Subject Sex', 
                                               'Was Subject Injured?', 
                                               'Was Subject Injury Fatal?']])
officersSubjectsCat = pd.concat([portland['Case #'],officersSubjectsCat], axis = 1)
officersSubjectsCat = officersSubjectsCat.groupby(officersSubjectsCat['Case #']).sum().reset_index()
portlandEncoded = pd.merge(portland.groupby('Case #').first().reset_index(), officersSubjectsCat, how = 'left', on = 'Case #')

portlandEncoded.info()

## Louisville Kentucky

louisville = dictOfDfs['Louisville Kentucky'].copy()
louisville.info()
louisville.head()

for x in louisville.columns:
    print(louisville[x].value_counts())

louisville['num_of_officers'] = [len(str(x).split(',')) for x in louisville.officer_name]
louisville.head()

louisville['suspect_weapon'].value_counts()

investigation_map = {
    'OIS'                                        :'Unknown',
    'SHOOTING INVESTIGATION - LMPD INVOLVED'     :'shooting',
    'DEATH INVESTIGATION-LMPD INVOLVED'          :'homicide',
    'SHOOTING INVESTIGATION-LMPD INVOLVED'       :'shooting',
    'SHOOTING INVESTIGATION- LMPD INVOLVED'      :'shooting',
    'SHOOTING INVESTIGATIONS - LMPD INVOLVED'    :'shooting',
    'DEATH INVESTIGATION - LMPD INVOLVED'        :'homicide',
    'Assault 1st-Police'                         :'assault',
    'Wanton Endangrerment 1st'                   :'reckless-endanderment',
    'Assault 1st'                                :'assault'
}

suspect_weapon_map = {
    'Handgun'          :'firearm',
    'Firearm'          :'firearm',
    'Vehicle'           :'other',
    'None'              :'none',
    'Rifle'             :'firearm',
    'Knife'             :'melee',
    'Shotgun'           :'firearm',
    'U'                 :'unknown',
    'Metal Rod'         :'melee',
    'Handgun/Rifle'     :'firearm',
    'BB Gun'            :'other',
    'Metal Pole'        :'melee',
    'Screwdriver'       :'melee',
    'Knife/Saw'         :'melee',
    'Knives'            :'melee'
}

louisville['Investigation Type'] = louisville['Investigation Type'].map(investigation_map)
louisville['suspect_weapon'] = louisville['suspect_weapon'].map(suspect_weapon_map)

louisville

## Springfiled Missouri

springfield = dictOfDfs['Springfiled Missouri'].copy()
springfield.info()
springfield

for x in springfield.columns:
    print(springfield[x].value_counts())

## Charolette North Carolina

charolette1 = dictOfDfs['Charolette North Carolina 1']
charolette2 = dictOfDfs['Charolette North Carolina offic']
charolette3 = dictOfDfs['Charolette North Carolina suspe']

charolette1.head()

charolette2OneHot = pd.get_dummies(charolette2[['INCIDENT_ID','OFFICER_RACE', 'OFFICER_GENDER']]).groupby('INCIDENT_ID').sum().reset_index()
charolette3OneHot = pd.get_dummies(charolette3[['INCIDENT_ID','INDIVIDUAL_RACE', 'INDIVIDUAL_GENDER', 'INDIVIDUAL_INJURY_TYPE']]).groupby('INCIDENT_ID').sum().reset_index()

charoletteMerge1 = pd.merge(charolette1,charolette2OneHot,on = 'INCIDENT_ID')
charolette = pd.merge(charoletteMerge1,charolette3OneHot, on  = 'INCIDENT_ID').copy()

charolette['']

for x in charolette.columns:
    print(charolette[x].value_counts())

