### Data Exploration and Cleaning

In [1]:
import pandas as pd
import os
import warnings
warnings.filterwarnings("ignore")

  from pandas.core import (


In [2]:
# Directory of all files 
nibrs_files = "Resources/"
# Listing the files in the directory
file_list = os.listdir(nibrs_files)
csv_files = [file for file in file_list if file.endswith('.csv')]
# Storing the dataframes
dataframes = {}
# Reading each csv file
for file in csv_files:
    file_path = os.path.join(nibrs_files, file)
    df_name = file.replace('.csv', '')
    dataframes[df_name] = pd.read_csv(file_path)
# Since the year is the same, drop the duplicate column 
for key in dataframes.keys():
    if 'data_year' in dataframes[key].columns:
        dataframes[key] = dataframes[key].drop('data_year', axis=1)

In [3]:
# Merging the dataframes
df_combined = pd.merge(dataframes['NIBRS_VICTIM_OFFENSE'], dataframes['NIBRS_OFFENSE'],how='outer', on='offense_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_SUSPECT_USING'], how='outer', on='offense_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_BIAS_MOTIVATION'], how='outer', on='offense_id')
df_offender = pd.merge(dataframes['NIBRS_OFFENDER'], dataframes['NIBRS_AGE'],how='outer', on='age_id')
df_offender = pd.merge(df_offender, dataframes['REF_RACE'],how='outer', on='race_id')
df_offender = pd.merge(df_offender, dataframes['NIBRS_ETHNICITY'],how='outer', on='ethnicity_id')
df_victim = pd.merge(dataframes['NIBRS_VICTIM'], dataframes['NIBRS_AGE'],how='outer', on='age_id') 
df_victim = pd.merge(df_victim, dataframes['REF_RACE'],how='outer', on='race_id')
df_victim = pd.merge(df_victim, dataframes['NIBRS_ETHNICITY'],how='outer', on='ethnicity_id')
# Merging 'df_offender' and 'df_victim' dataframes
df_victim_offender = pd.merge(df_victim, df_offender, on='incident_id', how='outer', suffixes=('_victim', '_offender'))
# Merging 'df_combined'and 'df_victim_offender' dataframes
df_combined = pd.merge(df_combined, df_victim_offender, how='outer', left_on=['incident_id', 'victim_id'], right_on=['incident_id', 'victim_id'])
df_combined = pd.merge(df_combined, dataframes['NIBRS_incident'],how='outer', on='incident_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_PROPERTY'],how='outer', on='incident_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_PROPERTY_DESC'],how='outer', on='property_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_VICTIM_OFFENDER_REL'], how='outer', left_on=['victim_id', 'offender_id'], right_on=['victim_id', 'offender_id'])
df_combined = pd.merge(df_combined, dataframes['NIBRS_CRIMINAL_ACT'],how='outer', on='offense_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_WEAPON'],how='outer', on='offense_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_VICTIM_INJURY'],how='outer', on='victim_id')
df_combined = pd.merge(df_combined, dataframes['agencies'],how='outer', on='agency_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_OFFENSE_TYPE'],how='outer', on='offense_code')
df_combined = pd.merge(df_combined, dataframes['NIBRS_PROP_DESC_TYPE'],how='outer', on='prop_desc_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_LOCATION_TYPE'],how='outer', on='location_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_BIAS_LIST'],how='outer', on='bias_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_WEAPON_TYPE'],how='outer', on='weapon_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_RELATIONSHIP'],how='outer', on='relationship_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_VICTIM_CIRCUMSTANCES'],how='outer', on='victim_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_CIRCUMSTANCES'],how='outer', on='circumstances_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_CRIMINAL_ACT_TYPE'],how='outer', on='criminal_act_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_ACTIVITY_TYPE'],how='outer', on='activity_type_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_VICTIM_TYPE'],how='outer', on='victim_type_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_PROP_LOSS_TYPE'],how='outer', on='prop_loss_id')
df_combined = pd.merge(df_combined, dataframes['NIBRS_INJURY'],how='outer', on='injury_id')
df_complete = pd.merge(df_combined, dataframes['NIBRS_USING_LIST'],how='outer', on='suspect_using_id')


In [4]:
# Copying the dataframe to clean data
df_cleaned = df_complete.copy()

In [5]:
# Showing all columns of the dataframe
pd.set_option('display.max_columns', None)
df_cleaned

Unnamed: 0,victim_id,offense_id,incident_id,offense_code,attempt_complete_flag,location_id,num_premises_entered,method_entry_code,suspect_using_id,bias_id,victim_seq_num,victim_type_id,assignment_type_id,activity_type_id,outside_agency_id,age_id_victim,age_num_victim,sex_code_victim,race_id_victim,ethnicity_id_victim,resident_status_code,age_range_low_num_victim,age_code_range_high,age_code_victim,age_name_victim,race_code_victim,race_desc_victim,sort_order_victim,start_year_victim,end_year_victim,notes_victim,ethnicity_code_victim,ethnicity_name_victim,offender_id,offender_seq_num,age_id_offender,age_num_offender,sex_code_offender,race_id_offender,ethnicity_id_offender,age_range_low_num_offender,age_range_high_num,age_code_offender,age_name_offender,race_code_offender,race_desc_offender,sort_order_offender,start_year_offender,end_year_offender,notes_offender,ethnicity_code_offender,ethnicity_name_offender,agency_id,nibrs_month_id,cargo_theft_flag,submission_date,incident_date,report_date_flag,incident_hour,cleared_except_id,cleared_except_date,incident_status,data_home,orig_format,did,property_id,prop_loss_id,stolen_count,recovered_count,prop_desc_id,property_value,date_recovered,nibrs_prop_desc_id,relationship_id,nibrs_victim_offender_id,criminal_act_id,weapon_id,nibrs_weapon_id,injury_id,yearly_agency_id,ori,legacy_ori,covered_by_legacy_ori,direct_contributor_flag,dormant_flag,dormant_year,reporting_type,ucr_agency_name,ncic_agency_name,pub_agency_name,pub_agency_unit,agency_status,state_id,state_name,state_abbr,state_postal_abbr,division_code,division_name,region_code,region_name,region_desc,agency_type_name,population,submitting_agency_id,sai,submitting_agency_name,suburban_area_flag,population_group_id,population_group_code,population_group_desc,parent_pop_group_code,parent_pop_group_desc,mip_flag,pop_sort_order,summary_rape_def,pe_reported_flag,male_officer,male_civilian,male_officer+male_civilian,female_officer,female_civilian,female_officer+female_civilian,officer_rate,employee_rate,nibrs_cert_date,nibrs_start_date,nibrs_leoka_start_date,nibrs_ct_start_date,nibrs_multi_bias_start_date,nibrs_off_eth_start_date,covered_flag,county_name,msa_name,publishable_flag,participated,nibrs_participated,offense_name,crime_against,ct_flag,hc_flag,hc_code,offense_category_name,offense_group,prop_desc_name,prop_desc_code,location_code,location_name,bias_code,bias_category,bias_desc,weapon_code,weapon_name,shr_flag,relationship_code,relationship_name,circumstances_id,justifiable_force_id,circumstance_type,circumstance_code,circumstance_name,criminal_act_code,criminal_act_name,criminal_act_desc,activity_type_code,activity_type_name,victim_type_code,victim_type_name,prop_loss_name,prop_loss_desc,injury_code,injury_name,suspect_using_code,suspect_using_name
0,168523966.0,182848832.0,152637181.0,13A,C,35.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,10.0,10.0,R,28.0,,28,28 Years Old,W,White,10.0,,,,H,Hispanic or Latino,173339132.0,1.0,36.0,33,M,10.0,10.0,33.0,,33,33 Years Old,W,White,10.0,,,,H,Hispanic or Latino,2513.0,44090589.0,f,2022-05-02 17:59:32.634,2022-04-10,f,0.0,6.0,,ACCEPTED,,X,139056781.0,,,,,,,,,3.0,,9.0,38.0,,1.0,25132022.0,FL0110000,FL0110000,,N,N,,I,COLLIER,COLLIER CO SO NAPLES,Collier,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,363680.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,,Y,499.0,123.0,622.0,79.0,270.0,349.0,1.6,2.7,2021-01-01,2021-01-01,2022-03-01,2022-03-01,2022-03-01,2022-03-01,N,COLLIER,"Naples-Marco Island, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,20.0,Residence/Home,88.0,None/Unknown,None (no bias),90,Other,t,BG,Victim Was Boyfriend/Girlfriend,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
1,168524320.0,182849216.0,152637506.0,13A,C,21.0,,,1.0,88.0,1.0,4.0,,,,26.0,23,M,10.0,20.0,U,23.0,,23,23 Years Old,W,White,10.0,,,,N,Not Hispanic or Latino,173339554.0,1.0,23.0,20,M,10.0,50.0,20.0,,20,20 Years Old,W,White,10.0,,,,X,Not Specified,2513.0,44090589.0,f,2022-05-02 18:01:41.751,2022-04-03,f,0.0,6.0,,ACCEPTED,,X,139057854.0,,,,,,,,,1.0,,9.0,41.0,,1.0,25132022.0,FL0110000,FL0110000,,N,N,,I,COLLIER,COLLIER CO SO NAPLES,Collier,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,363680.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,,Y,499.0,123.0,622.0,79.0,270.0,349.0,1.6,2.7,2021-01-01,2021-01-01,2022-03-01,2022-03-01,2022-03-01,2022-03-01,N,COLLIER,"Naples-Marco Island, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,10.0,Field/Woods,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
2,173518666.0,187861059.0,157003848.0,13A,C,35.0,,,1.0,88.0,1.0,4.0,,,,33.0,30,F,10.0,20.0,R,30.0,,30,30 Years Old,W,White,10.0,,,,N,Not Hispanic or Latino,178284124.0,1.0,37.0,34,M,50.0,20.0,34.0,,34,34 Years Old,P,Native Hawaiian or Other Pacific Islander,50.0,2013.0,,,N,Not Hispanic or Latino,2615.0,44165954.0,f,2022-08-08 13:49:24.809,2022-02-03,f,0.0,6.0,,ACCEPTED,,X,150494821.0,,,,,,,,,16.0,,9.0,38.0,,1.0,26152022.0,FL0160000,FL0160000,,N,N,,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,20.0,Residence/Home,88.0,None/Unknown,None (no bias),90,Other,t,OK,Victim was Otherwise Known,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
3,174467194.0,188805224.0,157828225.0,13A,C,37.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,20.0,20.0,R,28.0,,28,28 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,179227534.0,1.0,27.0,24,F,20.0,20.0,24.0,,24,24 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,2615.0,44168495.0,f,2022-08-22 18:22:04.183,2022-07-31,f,0.0,6.0,,ACCEPTED,,X,152404412.0,,,,,,,,,1.0,,9.0,41.0,,1.0,26152022.0,FL0160000,FL0160000,,N,N,,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,21.0,Restaurant,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
4,174467194.0,188805224.0,157828225.0,13A,C,37.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,20.0,20.0,R,28.0,,28,28 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,179227535.0,2.0,25.0,22,F,20.0,20.0,21.0,23.0,22,22 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,2615.0,44168495.0,f,2022-08-22 18:22:04.183,2022-07-31,f,0.0,6.0,,ACCEPTED,,X,152404412.0,,,,,,,,,1.0,,9.0,41.0,,1.0,26152022.0,FL0160000,FL0160000,,N,N,,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,21.0,Restaurant,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459269,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,,,,70.0,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459270,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,1.0,,,,,,,NN,Under 24 Hours,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459271,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,2.0,,,,,,,NB,1-6 Days Old,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459272,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,3.0,,,,,,,BB,7-364 Days Old,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [6]:
# Removing columns where all values are null
df_cleaned = df_cleaned.dropna(axis=1, how='all')
df_cleaned

Unnamed: 0,victim_id,offense_id,incident_id,offense_code,attempt_complete_flag,location_id,num_premises_entered,method_entry_code,suspect_using_id,bias_id,victim_seq_num,victim_type_id,assignment_type_id,activity_type_id,outside_agency_id,age_id_victim,age_num_victim,sex_code_victim,race_id_victim,ethnicity_id_victim,resident_status_code,age_range_low_num_victim,age_code_range_high,age_code_victim,age_name_victim,race_code_victim,race_desc_victim,sort_order_victim,start_year_victim,end_year_victim,notes_victim,ethnicity_code_victim,ethnicity_name_victim,offender_id,offender_seq_num,age_id_offender,age_num_offender,sex_code_offender,race_id_offender,ethnicity_id_offender,age_range_low_num_offender,age_range_high_num,age_code_offender,age_name_offender,race_code_offender,race_desc_offender,sort_order_offender,start_year_offender,end_year_offender,notes_offender,ethnicity_code_offender,ethnicity_name_offender,agency_id,nibrs_month_id,cargo_theft_flag,submission_date,incident_date,report_date_flag,incident_hour,cleared_except_id,cleared_except_date,incident_status,orig_format,did,property_id,prop_loss_id,stolen_count,recovered_count,prop_desc_id,property_value,date_recovered,relationship_id,criminal_act_id,weapon_id,injury_id,yearly_agency_id,ori,legacy_ori,direct_contributor_flag,dormant_flag,reporting_type,ucr_agency_name,ncic_agency_name,pub_agency_name,pub_agency_unit,agency_status,state_id,state_name,state_abbr,state_postal_abbr,division_code,division_name,region_code,region_name,region_desc,agency_type_name,population,submitting_agency_id,sai,submitting_agency_name,suburban_area_flag,population_group_id,population_group_code,population_group_desc,parent_pop_group_code,parent_pop_group_desc,mip_flag,pop_sort_order,pe_reported_flag,male_officer,male_civilian,male_officer+male_civilian,female_officer,female_civilian,female_officer+female_civilian,officer_rate,employee_rate,nibrs_cert_date,nibrs_start_date,nibrs_leoka_start_date,nibrs_ct_start_date,nibrs_multi_bias_start_date,nibrs_off_eth_start_date,covered_flag,county_name,msa_name,publishable_flag,participated,nibrs_participated,offense_name,crime_against,ct_flag,hc_flag,hc_code,offense_category_name,offense_group,prop_desc_name,prop_desc_code,location_code,location_name,bias_code,bias_category,bias_desc,weapon_code,weapon_name,shr_flag,relationship_code,relationship_name,circumstances_id,justifiable_force_id,circumstance_type,circumstance_code,circumstance_name,criminal_act_code,criminal_act_name,criminal_act_desc,activity_type_code,activity_type_name,victim_type_code,victim_type_name,prop_loss_name,prop_loss_desc,injury_code,injury_name,suspect_using_code,suspect_using_name
0,168523966.0,182848832.0,152637181.0,13A,C,35.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,10.0,10.0,R,28.0,,28,28 Years Old,W,White,10.0,,,,H,Hispanic or Latino,173339132.0,1.0,36.0,33,M,10.0,10.0,33.0,,33,33 Years Old,W,White,10.0,,,,H,Hispanic or Latino,2513.0,44090589.0,f,2022-05-02 17:59:32.634,2022-04-10,f,0.0,6.0,,ACCEPTED,X,139056781.0,,,,,,,,3.0,9.0,38.0,1.0,25132022.0,FL0110000,FL0110000,N,N,I,COLLIER,COLLIER CO SO NAPLES,Collier,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,363680.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,Y,499.0,123.0,622.0,79.0,270.0,349.0,1.6,2.7,2021-01-01,2021-01-01,2022-03-01,2022-03-01,2022-03-01,2022-03-01,N,COLLIER,"Naples-Marco Island, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,20.0,Residence/Home,88.0,None/Unknown,None (no bias),90,Other,t,BG,Victim Was Boyfriend/Girlfriend,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
1,168524320.0,182849216.0,152637506.0,13A,C,21.0,,,1.0,88.0,1.0,4.0,,,,26.0,23,M,10.0,20.0,U,23.0,,23,23 Years Old,W,White,10.0,,,,N,Not Hispanic or Latino,173339554.0,1.0,23.0,20,M,10.0,50.0,20.0,,20,20 Years Old,W,White,10.0,,,,X,Not Specified,2513.0,44090589.0,f,2022-05-02 18:01:41.751,2022-04-03,f,0.0,6.0,,ACCEPTED,X,139057854.0,,,,,,,,1.0,9.0,41.0,1.0,25132022.0,FL0110000,FL0110000,N,N,I,COLLIER,COLLIER CO SO NAPLES,Collier,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,363680.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,Y,499.0,123.0,622.0,79.0,270.0,349.0,1.6,2.7,2021-01-01,2021-01-01,2022-03-01,2022-03-01,2022-03-01,2022-03-01,N,COLLIER,"Naples-Marco Island, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,10.0,Field/Woods,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
2,173518666.0,187861059.0,157003848.0,13A,C,35.0,,,1.0,88.0,1.0,4.0,,,,33.0,30,F,10.0,20.0,R,30.0,,30,30 Years Old,W,White,10.0,,,,N,Not Hispanic or Latino,178284124.0,1.0,37.0,34,M,50.0,20.0,34.0,,34,34 Years Old,P,Native Hawaiian or Other Pacific Islander,50.0,2013.0,,,N,Not Hispanic or Latino,2615.0,44165954.0,f,2022-08-08 13:49:24.809,2022-02-03,f,0.0,6.0,,ACCEPTED,X,150494821.0,,,,,,,,16.0,9.0,38.0,1.0,26152022.0,FL0160000,FL0160000,N,N,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,20.0,Residence/Home,88.0,None/Unknown,None (no bias),90,Other,t,OK,Victim was Otherwise Known,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
3,174467194.0,188805224.0,157828225.0,13A,C,37.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,20.0,20.0,R,28.0,,28,28 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,179227534.0,1.0,27.0,24,F,20.0,20.0,24.0,,24,24 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,2615.0,44168495.0,f,2022-08-22 18:22:04.183,2022-07-31,f,0.0,6.0,,ACCEPTED,X,152404412.0,,,,,,,,1.0,9.0,41.0,1.0,26152022.0,FL0160000,FL0160000,N,N,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,21.0,Restaurant,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
4,174467194.0,188805224.0,157828225.0,13A,C,37.0,,,1.0,88.0,1.0,4.0,,,,31.0,28,F,20.0,20.0,R,28.0,,28,28 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,179227535.0,2.0,25.0,22,F,20.0,20.0,21.0,23.0,22,22 Years Old,B,Black or African American,20.0,,,,N,Not Hispanic or Latino,2615.0,44168495.0,f,2022-08-22 18:22:04.183,2022-07-31,f,0.0,6.0,,ACCEPTED,X,152404412.0,,,,,,,,1.0,9.0,41.0,1.0,26152022.0,FL0160000,FL0160000,N,N,I,JACKSONVILLE SHERIFF'S OFFICE,DUVAL CO SO JACKSONVILLE,Jacksonville Sheriff's Office,,A,12.0,Florida,FL,FL,5.0,South Atlantic,3.0,South,Region III,County,971282.0,23362.0,FLUCR0001,Florida Department of Law Enforcement Uniform ...,Y,19.0,9A,"MSA counties 100,000 or over",9.0,MSA Counties,Y,19.0,N,,,,,,,,,2021-01-01,2021-10-01,2022-06-01,2022-06-01,2022-06-01,2022-06-01,N,DUVAL,"Jacksonville, FL",Y,Y,Y,Aggravated Assault,Person,f,t,04,Assault Offenses,A,,,21.0,Restaurant,88.0,None/Unknown,None (no bias),40,Personal Weapons,t,AQ,Victim Was Acquaintance,,,,,,N,None/Unknown,None/Unknown,,,I,Individual,,,B,Apparent Broken Bones,A,Alcohol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459269,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,,,,70.0,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459270,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,1.0,,,,,,,NN,Under 24 Hours,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459271,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,2.0,,,,,,,NB,1-6 Days Old,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
459272,,,,,,,,,,,,,,,,,,,70.0,,,,,,,M,Multiple,70.0,,,Used for groups with multiple races,,,,,3.0,,,,,,,BB,7-364 Days Old,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,


In [7]:
# # Reducing dataframe to columns that are relevant for the analysis
offender_columns = [
    'offender_id', 
    'age_num_offender',
    'sex_code_offender', 
     'race_code_offender', 'race_desc_offender', 
    'ethnicity_code_offender', 'ethnicity_name_offender', 
    'offense_id', 
   'offense_code', 
    'offense_category_name', 'offense_group', 
    'incident_id', 'incident_date',
    'crime_against', 
    'agency_id', 'county_name', 'msa_name', 
    'location_id', 'location_name', 
    'relationship_name', 
    'weapon_id','weapon_name', 
    'suspect_using_code', 'suspect_using_name'
]
df_offenders = df_cleaned[offender_columns]
df_offenders.head()

Unnamed: 0,offender_id,age_num_offender,sex_code_offender,race_code_offender,race_desc_offender,ethnicity_code_offender,ethnicity_name_offender,offense_id,offense_code,offense_category_name,offense_group,incident_id,incident_date,crime_against,agency_id,county_name,msa_name,location_id,location_name,relationship_name,weapon_id,weapon_name,suspect_using_code,suspect_using_name
0,173339132.0,33,M,W,White,H,Hispanic or Latino,182848832.0,13A,Assault Offenses,A,152637181.0,2022-04-10,Person,2513.0,COLLIER,"Naples-Marco Island, FL",35.0,Residence/Home,Victim Was Boyfriend/Girlfriend,38.0,Other,A,Alcohol
1,173339554.0,20,M,W,White,X,Not Specified,182849216.0,13A,Assault Offenses,A,152637506.0,2022-04-03,Person,2513.0,COLLIER,"Naples-Marco Island, FL",21.0,Field/Woods,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
2,178284124.0,34,M,P,Native Hawaiian or Other Pacific Islander,N,Not Hispanic or Latino,187861059.0,13A,Assault Offenses,A,157003848.0,2022-02-03,Person,2615.0,DUVAL,"Jacksonville, FL",35.0,Residence/Home,Victim was Otherwise Known,38.0,Other,A,Alcohol
3,179227534.0,24,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
4,179227535.0,22,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol


In [8]:
# Rename columns
df_offenders = df_offenders.rename(columns={
    'age_num_offender':'offender_age',
    'sex_code_offender':'offender_sex',
    'race_code_offender':'offender_race',
    'race_desc_offender':'race_description',
    'ethnicity_code_offender':'offender_ethnicity',
    'ethnicity_name_offender': 'ethnicity_description',
    'msa_name':'city_name',
    'location_name':'offense_location',
    'relationship_name':'offender_victim_relationship',
    'weapon_name':'weapon_used',
    'suspect_using_code':'offender_using_code',
    'suspect_using_name':'offender_using_name'
})

df_offenders

Unnamed: 0,offender_id,offender_age,offender_sex,offender_race,race_description,offender_ethnicity,ethnicity_description,offense_id,offense_code,offense_category_name,offense_group,incident_id,incident_date,crime_against,agency_id,county_name,city_name,location_id,offense_location,offender_victim_relationship,weapon_id,weapon_used,offender_using_code,offender_using_name
0,173339132.0,33,M,W,White,H,Hispanic or Latino,182848832.0,13A,Assault Offenses,A,152637181.0,2022-04-10,Person,2513.0,COLLIER,"Naples-Marco Island, FL",35.0,Residence/Home,Victim Was Boyfriend/Girlfriend,38.0,Other,A,Alcohol
1,173339554.0,20,M,W,White,X,Not Specified,182849216.0,13A,Assault Offenses,A,152637506.0,2022-04-03,Person,2513.0,COLLIER,"Naples-Marco Island, FL",21.0,Field/Woods,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
2,178284124.0,34,M,P,Native Hawaiian or Other Pacific Islander,N,Not Hispanic or Latino,187861059.0,13A,Assault Offenses,A,157003848.0,2022-02-03,Person,2615.0,DUVAL,"Jacksonville, FL",35.0,Residence/Home,Victim was Otherwise Known,38.0,Other,A,Alcohol
3,179227534.0,24,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
4,179227535.0,22,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
459269,,,,M,Multiple,,,,,,,,,,,,,,,,,,,
459270,,,,,,,,,,,,,,,,,,,,,,,,
459271,,,,,,,,,,,,,,,,,,,,,,,,
459272,,,,,,,,,,,,,,,,,,,,,,,,


In [9]:
# Removing rows where all values are null
df_offenders = df_offenders.dropna(how='any')
df_offenders

Unnamed: 0,offender_id,offender_age,offender_sex,offender_race,race_description,offender_ethnicity,ethnicity_description,offense_id,offense_code,offense_category_name,offense_group,incident_id,incident_date,crime_against,agency_id,county_name,city_name,location_id,offense_location,offender_victim_relationship,weapon_id,weapon_used,offender_using_code,offender_using_name
0,173339132.0,33,M,W,White,H,Hispanic or Latino,182848832.0,13A,Assault Offenses,A,152637181.0,2022-04-10,Person,2513.0,COLLIER,"Naples-Marco Island, FL",35.0,Residence/Home,Victim Was Boyfriend/Girlfriend,38.0,Other,A,Alcohol
1,173339554.0,20,M,W,White,X,Not Specified,182849216.0,13A,Assault Offenses,A,152637506.0,2022-04-03,Person,2513.0,COLLIER,"Naples-Marco Island, FL",21.0,Field/Woods,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
2,178284124.0,34,M,P,Native Hawaiian or Other Pacific Islander,N,Not Hispanic or Latino,187861059.0,13A,Assault Offenses,A,157003848.0,2022-02-03,Person,2615.0,DUVAL,"Jacksonville, FL",35.0,Residence/Home,Victim was Otherwise Known,38.0,Other,A,Alcohol
3,179227534.0,24,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
4,179227535.0,22,F,B,Black or African American,N,Not Hispanic or Latino,188805224.0,13A,Assault Offenses,A,157828225.0,2022-07-31,Person,2615.0,DUVAL,"Jacksonville, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,A,Alcohol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454551,192644681.0,21,M,B,Black or African American,N,Not Hispanic or Latino,202437651.0,09A,Homicide Offenses,A,169699691.0,2022-10-23,Person,2793.0,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",25.0,Highway/Road/Alley/Street/Sidewalk,Relationship Unknown,1.0,Firearm,N,Not Applicable
454556,192648117.0,41,M,W,White,H,Hispanic or Latino,202440964.0,09B,Homicide Offenses,A,169702678.0,2022-12-06,Person,2793.0,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",37.0,Restaurant,Victim Was Acquaintance,41.0,Personal Weapons,N,Not Applicable
454563,192650517.0,31,M,W,White,H,Hispanic or Latino,202443342.0,09A,Homicide Offenses,A,169704817.0,2022-12-28,Person,2545.0,MIAMI-DADE,"Miami-Fort Lauderdale-Pompano Beach, FL",25.0,Highway/Road/Alley/Street/Sidewalk,Victim Was Acquaintance,41.0,Personal Weapons,N,Not Applicable
454581,193165851.0,52,F,B,Black or African American,N,Not Hispanic or Latino,202961664.0,09A,Homicide Offenses,A,170156332.0,2022-11-08,Person,2793.0,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",35.0,Residence/Home,Victim Was Boyfriend/Girlfriend,21.0,Knife/Cutting Instrument,N,Not Applicable


In [10]:
# Removing duplicates rows based on all columns
df_offenders = df_offenders.drop_duplicates()
df_offenders.count()

offender_id                     73534
offender_age                    73534
offender_sex                    73534
offender_race                   73534
race_description                73534
offender_ethnicity              73534
ethnicity_description           73534
offense_id                      73534
offense_code                    73534
offense_category_name           73534
offense_group                   73534
incident_id                     73534
incident_date                   73534
crime_against                   73534
agency_id                       73534
county_name                     73534
city_name                       73534
location_id                     73534
offense_location                73534
offender_victim_relationship    73534
weapon_id                       73534
weapon_used                     73534
offender_using_code             73534
offender_using_name             73534
dtype: int64

In [11]:
# Exploring data types
df_offenders.dtypes

offender_id                     float64
offender_age                     object
offender_sex                     object
offender_race                    object
race_description                 object
offender_ethnicity               object
ethnicity_description            object
offense_id                      float64
offense_code                     object
offense_category_name            object
offense_group                    object
incident_id                     float64
incident_date                    object
crime_against                    object
agency_id                       float64
county_name                      object
city_name                        object
location_id                     float64
offense_location                 object
offender_victim_relationship     object
weapon_id                       float64
weapon_used                      object
offender_using_code              object
offender_using_name              object
dtype: object

In [12]:
# Transforming data types
# To datetime
df_offenders['incident_date'] = pd.to_datetime(df_offenders['incident_date'])
# To int
int_columns = ['offender_id','offender_age','offense_id', 'incident_id', 'agency_id','location_id','weapon_id']
df_offenders[int_columns] = df_offenders[int_columns].astype('int64')
df_offenders.dtypes

offender_id                              int64
offender_age                             int64
offender_sex                            object
offender_race                           object
race_description                        object
offender_ethnicity                      object
ethnicity_description                   object
offense_id                               int64
offense_code                            object
offense_category_name                   object
offense_group                           object
incident_id                              int64
incident_date                   datetime64[ns]
crime_against                           object
agency_id                                int64
county_name                             object
city_name                               object
location_id                              int64
offense_location                        object
offender_victim_relationship            object
weapon_id                                int64
weapon_used  

In [13]:
# Reseting the index 
df_offenders = df_offenders.reset_index(drop=True)
df_offenders

Unnamed: 0,offender_id,offender_age,offender_sex,offender_race,race_description,offender_ethnicity,ethnicity_description,offense_id,offense_code,offense_category_name,offense_group,incident_id,incident_date,crime_against,agency_id,county_name,city_name,location_id,offense_location,offender_victim_relationship,weapon_id,weapon_used,offender_using_code,offender_using_name
0,173339132,33,M,W,White,H,Hispanic or Latino,182848832,13A,Assault Offenses,A,152637181,2022-04-10,Person,2513,COLLIER,"Naples-Marco Island, FL",35,Residence/Home,Victim Was Boyfriend/Girlfriend,38,Other,A,Alcohol
1,173339554,20,M,W,White,X,Not Specified,182849216,13A,Assault Offenses,A,152637506,2022-04-03,Person,2513,COLLIER,"Naples-Marco Island, FL",21,Field/Woods,Victim Was Acquaintance,41,Personal Weapons,A,Alcohol
2,178284124,34,M,P,Native Hawaiian or Other Pacific Islander,N,Not Hispanic or Latino,187861059,13A,Assault Offenses,A,157003848,2022-02-03,Person,2615,DUVAL,"Jacksonville, FL",35,Residence/Home,Victim was Otherwise Known,38,Other,A,Alcohol
3,179227534,24,F,B,Black or African American,N,Not Hispanic or Latino,188805224,13A,Assault Offenses,A,157828225,2022-07-31,Person,2615,DUVAL,"Jacksonville, FL",37,Restaurant,Victim Was Acquaintance,41,Personal Weapons,A,Alcohol
4,179227535,22,F,B,Black or African American,N,Not Hispanic or Latino,188805224,13A,Assault Offenses,A,157828225,2022-07-31,Person,2615,DUVAL,"Jacksonville, FL",37,Restaurant,Victim Was Acquaintance,41,Personal Weapons,A,Alcohol
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
73529,192644681,21,M,B,Black or African American,N,Not Hispanic or Latino,202437651,09A,Homicide Offenses,A,169699691,2022-10-23,Person,2793,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",25,Highway/Road/Alley/Street/Sidewalk,Relationship Unknown,1,Firearm,N,Not Applicable
73530,192648117,41,M,W,White,H,Hispanic or Latino,202440964,09B,Homicide Offenses,A,169702678,2022-12-06,Person,2793,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",37,Restaurant,Victim Was Acquaintance,41,Personal Weapons,N,Not Applicable
73531,192650517,31,M,W,White,H,Hispanic or Latino,202443342,09A,Homicide Offenses,A,169704817,2022-12-28,Person,2545,MIAMI-DADE,"Miami-Fort Lauderdale-Pompano Beach, FL",25,Highway/Road/Alley/Street/Sidewalk,Victim Was Acquaintance,41,Personal Weapons,N,Not Applicable
73532,193165851,52,F,B,Black or African American,N,Not Hispanic or Latino,202961664,09A,Homicide Offenses,A,170156332,2022-11-08,Person,2793,HILLSBOROUGH,"Tampa-St. Petersburg-Clearwater, FL",35,Residence/Home,Victim Was Boyfriend/Girlfriend,21,Knife/Cutting Instrument,N,Not Applicable


In [14]:
# Saving the DataFrame to a CSV file. 
df_offenders.to_csv('Resources/offenders_data_cleaned.csv', index=False)