In [1]:
# import libraries
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as st

In [2]:
# import csv files and read into dataframes
crime_data = pd.read_csv("../../Resources/covid_crime_data.csv")
crime_data.head()

Unnamed: 0,REF_DATE,GEO,DGUID,Violations and calls for service,Statistics,UOM,UOM_ID,SCALAR_FACTOR,SCALAR_ID,VECTOR,COORDINATE,VALUE,STATUS,SYMBOL,TERMINATED,DECIMALS
0,2019-03,"Total, Selected police services",,"Total assaults (levels 1, 2, 3) [141]",Actual incidents,Number,223,units,0,v1267112443,20.1.1,,..,,t,0
1,2019-03,"Total, Selected police services",,"Assaults by non-family (levels 1, 2, 3)",Actual incidents,Number,223,units,0,v1267112444,20.2.1,,..,,t,0
2,2019-03,"Total, Selected police services",,"Assaults by family (levels 1, 2, 3)",Actual incidents,Number,223,units,0,v1267112445,20.3.1,,..,,t,0
3,2019-03,"Total, Selected police services",,Assaults where relationship type is unknown,Actual incidents,Number,223,units,0,v1267112446,20.4.1,,..,,t,0
4,2019-03,"Total, Selected police services",,"Total sexual assaults (levels 1, 2, 3) [131]",Actual incidents,Number,223,units,0,v1267112447,20.5.1,,..,,t,0


In [3]:
# drop columns not needed for analysis
crime_data = crime_data.drop(columns=['Statistics', 'UOM', 'UOM_ID', 'DGUID', 'SCALAR_FACTOR', 'SCALAR_ID', 'STATUS', 'SYMBOL', 'TERMINATED', 'DECIMALS'])
crime_data.head(10)

Unnamed: 0,REF_DATE,GEO,Violations and calls for service,VECTOR,COORDINATE,VALUE
0,2019-03,"Total, Selected police services","Total assaults (levels 1, 2, 3) [141]",v1267112443,20.1.1,
1,2019-03,"Total, Selected police services","Assaults by non-family (levels 1, 2, 3)",v1267112444,20.2.1,
2,2019-03,"Total, Selected police services","Assaults by family (levels 1, 2, 3)",v1267112445,20.3.1,
3,2019-03,"Total, Selected police services",Assaults where relationship type is unknown,v1267112446,20.4.1,
4,2019-03,"Total, Selected police services","Total sexual assaults (levels 1, 2, 3) [131]",v1267112447,20.5.1,
5,2019-03,"Total, Selected police services","Sexual assaults by non-family (levels 1, 2, 3)",v1267112448,20.6.1,
6,2019-03,"Total, Selected police services","Sexual assaults by family (levels 1, 2, 3)",v1267112449,20.7.1,
7,2019-03,"Total, Selected police services",Sexual assaults where relationship type is unk...,v1267112450,20.8.1,
8,2019-03,"Total, Selected police services",Assault against peace or public officer (level...,v1267112451,20.9.1,
9,2019-03,"Total, Selected police services",Uttering threats [1627],v1267112452,20.10.1,


In [4]:
# list unique values in GEO column
crime_data['GEO'].unique()

array(['Total, Selected police services',
       'Royal Newfoundland Constabulary [10C01]',
       'Kennebecasis Region, New Brunswick, municipal [13024]',
       'Montréal, Quebec, municipal [24175]',
       'Quebec Provincial Police [24C01]',
       'Ottawa, Ontario, municipal [35010]',
       'Halton Region (Oakville/Burlington), Ontario, municipal [35048]',
       'London, Ontario, municipal [35162]',
       'Waterloo Region (Kitchener), Ontario, municipal [35291]',
       'Toronto, Ontario, municipal [35304]',
       'York Region, Ontario, municipal [35335]',
       'Ontario Provincial Police [35C01]',
       'Winnipeg, Manitoba, municipal [46064]',
       'Regina, Saskatchewan, municipal [47063]',
       'Saskatoon, Saskatchewan, municipal [47066]',
       'Calgary, Alberta, municipal [48014]',
       'Edmonton, Alberta, municipal [48033]',
       'Vancouver, British Columbia, municipal [59023]',
       'Victoria, British Columbia, municipal [59025]',
       'Royal Canadian Mount

In [5]:
# rename rows in GEO column to province names
crime_data['GEO'] = crime_data['GEO'].replace({
    'Total, Selected police services' : 'Canada',
    'Royal Newfoundland Constabulary [10C01]' : 'Newfoundland and Labrador',
    'Kennebecasis Region, New Brunswick, municipal [13024]' : 'New Brunswick',
    'Montréal, Quebec, municipal [24175]' : 'Quebec',
    'Quebec Provincial Police [24C01]' : 'Quebec',
    'Ottawa, Ontario, municipal [35010]' : 'Ontario',
    'Halton Region (Oakville/Burlington), Ontario, municipal [35048]' : 'Ontario',
    'London, Ontario, municipal [35162]' : 'Ontario',
    'Waterloo Region (Kitchener), Ontario, municipal [35291]' : 'Ontario',
    'Toronto, Ontario, municipal [35304]' : 'Ontario',
    'York Region, Ontario, municipal [35335]' : 'Ontario',
    'Ontario Provincial Police [35C01]' : 'Ontario',
    'Winnipeg, Manitoba, municipal [46064]' : 'Manitoba',
    'Regina, Saskatchewan, municipal [47063]' : 'Saskatchewan',
    'Saskatoon, Saskatchewan, municipal [47066]' : 'Saskatchewan',
    'Calgary, Alberta, municipal [48014]' : 'Alberta',
    'Edmonton, Alberta, municipal [48033]' : 'Alberta',
    'Vancouver, British Columbia, municipal [59023]' : 'British Columbia',
    'Victoria, British Columbia, municipal [59025]' : 'British Columbia',
    'Royal Canadian Mounted Police [99C01]' : 'Canada',
    })
newdf = crime_data['GEO'].unique()


In [6]:
# Find data types
crime_data.dtypes

REF_DATE                             object
GEO                                  object
Violations and calls for service     object
VECTOR                               object
COORDINATE                           object
VALUE                               float64
dtype: object

In [7]:
# Convert 'REF_DATE' column to datetime format
crime_data['REF_DATE'] = pd.to_datetime(crime_data['REF_DATE'])
crime_data.dtypes

REF_DATE                            datetime64[ns]
GEO                                         object
Violations and calls for service            object
VECTOR                                      object
COORDINATE                                  object
VALUE                                      float64
dtype: object

In [8]:
# Deconcatenate 'REF_DATE' column into year and month columns
crime_data['Year'] = crime_data['REF_DATE'].dt.year
crime_data['Month'] = crime_data['REF_DATE'].dt.month
# Move 'Year' and 'Month' columns to front of dataframe
crime_data = crime_data[['Year', 'Month', 'REF_DATE', 'GEO', 'Violations and calls for service', 'VECTOR', 'COORDINATE', 'VALUE']]
crime_data.head()

Unnamed: 0,Year,Month,REF_DATE,GEO,Violations and calls for service,VECTOR,COORDINATE,VALUE
0,2019,3,2019-03-01,Canada,"Total assaults (levels 1, 2, 3) [141]",v1267112443,20.1.1,
1,2019,3,2019-03-01,Canada,"Assaults by non-family (levels 1, 2, 3)",v1267112444,20.2.1,
2,2019,3,2019-03-01,Canada,"Assaults by family (levels 1, 2, 3)",v1267112445,20.3.1,
3,2019,3,2019-03-01,Canada,Assaults where relationship type is unknown,v1267112446,20.4.1,
4,2019,3,2019-03-01,Canada,"Total sexual assaults (levels 1, 2, 3) [131]",v1267112447,20.5.1,


In [9]:
# Rename columns
# GEO = Province
# DGUID = Province code
# Violations and calls for service = Crime type
# VECTOR = Vector ID
# COORDINATE = Coordinate
# VALUE = Value
crime_data = crime_data.rename(columns={
    'GEO': 'Province', 
    'DGUID': 'Province Code', 
    'Violations and calls for service': 'Crime Type', 
    'VECTOR': 'Vector ID', 
    'COORDINATE': 'Coordinate', 
    'VALUE': 'Value'
    })
crime_data.head()


Unnamed: 0,Year,Month,REF_DATE,Province,Crime Type,Vector ID,Coordinate,Value
0,2019,3,2019-03-01,Canada,"Total assaults (levels 1, 2, 3) [141]",v1267112443,20.1.1,
1,2019,3,2019-03-01,Canada,"Assaults by non-family (levels 1, 2, 3)",v1267112444,20.2.1,
2,2019,3,2019-03-01,Canada,"Assaults by family (levels 1, 2, 3)",v1267112445,20.3.1,
3,2019,3,2019-03-01,Canada,Assaults where relationship type is unknown,v1267112446,20.4.1,
4,2019,3,2019-03-01,Canada,"Total sexual assaults (levels 1, 2, 3) [131]",v1267112447,20.5.1,


In [10]:
# Create dataframe for crime types
crime_types = crime_data['Crime Type'].unique()
crime_types_df = pd.DataFrame(crime_types)
crime_types_df = crime_types_df.rename(columns={0: 'Crime Type'})
crime_types_df


Unnamed: 0,Crime Type
0,"Total assaults (levels 1, 2, 3) [141]"
1,"Assaults by non-family (levels 1, 2, 3)"
2,"Assaults by family (levels 1, 2, 3)"
3,Assaults where relationship type is unknown
4,"Total sexual assaults (levels 1, 2, 3) [131]"
5,"Sexual assaults by non-family (levels 1, 2, 3)"
6,"Sexual assaults by family (levels 1, 2, 3)"
7,Sexual assaults where relationship type is unk...
8,Assault against peace or public officer (level...
9,Uttering threats [1627]


In [11]:
# Create a mapping dictionary for categories
category_mapping = {
    'Assaults': ['Total assaults (levels 1, 2, 3) [141]', 'Assaults by non-family (levels 1, 2, 3)', 'Assaults by family (levels 1, 2, 3)', 'Assaults where relationship type is unknown','Assault against peace or public officer (level 1) [1460]'],
    'Sexual Assaults': ['Total sexual assaults (levels 1, 2, 3) [131]', 'Sexual assaults by non-family (levels 1, 2, 3)', 'Sexual assaults by family (levels 1, 2, 3)', 'Sexual assaults where relationship type is unknown'],
    'Threats': ['Assault against peace or public officer (level 1)', 'Uttering threats [1627]', 'Uttering threats by non-family', 'Uttering threats by family', 'Uttering threats where relationship type is unknown'],
    'Robbery': ['Total robbery [160]'],
    'Driving Offenses': ['Dangerous operation, causing death or bodily harm [911]', 'Impaired driving, causing death or bodily harm [921]', 'Impaired driving, operating while impaired [923]'],
    'Breaking and Entering': ['Total breaking and entering [210]', 'Breaking and entering, non-residential', 'Breaking and entering, residential', 'Breaking and entering where location type is unknown'],
    'Theft': ['Motor vehicle theft [2135]', 'Shoplifting [213]'],
    'Fraud and Identity Theft': ['Fraud/identity theft/identity fraud [216]'],
    'Failures to Comply': ['Fail to comply with order [3410]'],
    'COVID-19 Related Offenses': ['Provincial/Territorial acts related to COVID-19'],
    'Calls for Service': ['Calls for service, domestic disturbances / disputes', 'Calls for service, Mental Health Act (MHA) apprehension', 'Calls for service, mental health - other', 'Calls for service, suicide/attempted suicide', 'Calls for service, overdose', 'Calls for service, child welfare check', 'Calls for service, check welfare - general', 'Calls for service, child custody matter - domestic']
}

# Create a new column 'Category' in the DataFrame
crime_data['Crime Category'] = crime_data['Crime Type'].apply(lambda x: next((k for k, v in category_mapping.items() if x in v), None))

# Display dataframe
crime_data.head()

Unnamed: 0,Year,Month,REF_DATE,Province,Crime Type,Vector ID,Coordinate,Value,Crime Category
0,2019,3,2019-03-01,Canada,"Total assaults (levels 1, 2, 3) [141]",v1267112443,20.1.1,,Assaults
1,2019,3,2019-03-01,Canada,"Assaults by non-family (levels 1, 2, 3)",v1267112444,20.2.1,,Assaults
2,2019,3,2019-03-01,Canada,"Assaults by family (levels 1, 2, 3)",v1267112445,20.3.1,,Assaults
3,2019,3,2019-03-01,Canada,Assaults where relationship type is unknown,v1267112446,20.4.1,,Assaults
4,2019,3,2019-03-01,Canada,"Total sexual assaults (levels 1, 2, 3) [131]",v1267112447,20.5.1,,Sexual Assaults


In [12]:
# Rearrange Column Order
crime_data = crime_data[['Vector ID', 'Year', 'Month', 'Province', 'Crime Category', 'Crime Type', 'Coordinate', 'Value']]
crime_data.head()

Unnamed: 0,Vector ID,Year,Month,Province,Crime Category,Crime Type,Coordinate,Value
0,v1267112443,2019,3,Canada,Assaults,"Total assaults (levels 1, 2, 3) [141]",20.1.1,
1,v1267112444,2019,3,Canada,Assaults,"Assaults by non-family (levels 1, 2, 3)",20.2.1,
2,v1267112445,2019,3,Canada,Assaults,"Assaults by family (levels 1, 2, 3)",20.3.1,
3,v1267112446,2019,3,Canada,Assaults,Assaults where relationship type is unknown,20.4.1,
4,v1267112447,2019,3,Canada,Sexual Assaults,"Total sexual assaults (levels 1, 2, 3) [131]",20.5.1,


In [13]:
# Custom sort
crime_data.sort_values(by=['Province', 'Crime Category', 'Crime Type', 'Value'], ascending=[True, True, True, False], inplace=True)

# Resetting index
crime_data.reset_index(drop=True, inplace=True)

# Display data frame
crime_data.head()


Unnamed: 0,Vector ID,Year,Month,Province,Crime Category,Crime Type,Coordinate,Value
0,v1210497756,2022,3,Alberta,Assaults,Assault against peace or public officer (level...,14.9.1,44.0
1,v1210497756,2019,9,Alberta,Assaults,Assault against peace or public officer (level...,14.9.1,43.0
2,v1210497756,2020,3,Alberta,Assaults,Assault against peace or public officer (level...,14.9.1,39.0
3,v1210497756,2020,5,Alberta,Assaults,Assault against peace or public officer (level...,14.9.1,38.0
4,v1210497756,2019,8,Alberta,Assaults,Assault against peace or public officer (level...,14.9.1,36.0


In [14]:
# Clone
clean_crime_data = crime_data

In [15]:
# Drop blank rows in Value column
clean_crime_data = clean_crime_data.dropna(subset=['Value'])

In [16]:
# Create new csv output for clean data
clean_crime_data.to_csv("../../Output/clean_covid_crime_data.csv", index = False)