In [None]:
#Importing Python Packages
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
#Reading in Chicago Crime File
chicagoCrimeDataSet = pd.read_csv('../data/Chicago_Crime.csv')
chicagoCrimeDataSet.head()

In [None]:
#Checking Info of Dataset
chicagoCrimeDataSet.info()

In [None]:
chicagoCrimeDataSet['CASE#'].count()

In [None]:
chicagoCrimeDataSet['CASE#'].nunique()

In [None]:
chicagoCrimeDataSetCleaned = chicagoCrimeDataSet.dropna()
chicagoCrimeDataSetCleaned = chicagoCrimeDataSetCleaned.drop_duplicates()

In [None]:
chicagoCrimeDataSetCleaned.info()

In [None]:
#Creating occurrence year column
chicagoCrimeDataSetCleaned['OCCURRENCE YEAR'] = chicagoCrimeDataSetCleaned['DATE  OF OCCURRENCE'].str[6:10]
chicagoCrimeDataSetCleaned['OCCURRENCE YEAR']

In [None]:
#Creating occurrence month column
chicagoCrimeDataSetCleaned['OCCURRENCE MONTH'] = chicagoCrimeDataSetCleaned['DATE  OF OCCURRENCE'].str[0:2]
chicagoCrimeDataSetCleaned['OCCURRENCE MONTH']

In [None]:
#Number of months there is data present for 2021 in dataset
crimesPerMonth2021 = chicagoCrimeDataSetCleaned[chicagoCrimeDataSetCleaned['OCCURRENCE YEAR'].isin(['2021'])]
crimesPerMonth2021['OCCURRENCE MONTH'].nunique()

In [None]:
#Number of months there is data present for 2022 in dataset
crimesPerMonth2022 = chicagoCrimeDataSetCleaned[chicagoCrimeDataSetCleaned['OCCURRENCE YEAR'].isin(['2022'])]
crimesPerMonth2022['OCCURRENCE MONTH'].nunique()

In [None]:
#Crime Occurrences Per Month for 2021
crimesPerMonth2021_df = crimesPerMonth2021.groupby(['OCCURRENCE YEAR','OCCURRENCE MONTH'])['CASE#'].count().to_frame()
crimesPerMonth2021_df.rename(columns = {'CASE#':'OCCURRENCE'}, inplace = True)
crimesPerMonth2021_df = crimesPerMonth2021_df.reset_index()
crimesPerMonth2021_df = crimesPerMonth2021_df.sort_values(by='OCCURRENCE MONTH', ascending=True)
crimesPerMonth2021_df

In [None]:
#Crime Occurrences Per Month for 2021
crimesPerMonth2022_df = crimesPerMonth2022.groupby(['OCCURRENCE YEAR','OCCURRENCE MONTH'])['CASE#'].count().to_frame()
crimesPerMonth2022_df.rename(columns = {'CASE#':'OCCURRENCE'}, inplace = True)
crimesPerMonth2022_df = crimesPerMonth2022_df.reset_index()
crimesPerMonth2022_df = crimesPerMonth2022_df.sort_values(by='OCCURRENCE MONTH', ascending=True)
crimesPerMonth2022_df

In [None]:
#Crime numbers for 2021...up drastically from October to November, but only down slightly from November to December
plt.plot(crimesPerMonth2021_df['OCCURRENCE MONTH'], crimesPerMonth2021_df['OCCURRENCE'])
plt.title("Crime Occurrences per Month for 2021", fontsize=18)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show();

In [None]:
#Crime for the months that we have data on in 2022...almost a steady incline
#Data was acquired in October of 2022 - the drop from September to October cannot solely be attributed to a decrease in crime,
#if there was one. October was ongoing at the time of data acquisition so not all crime is recorded.
plt.plot(crimesPerMonth2022_df['OCCURRENCE MONTH'], crimesPerMonth2022_df['OCCURRENCE'])
plt.title("Crime Occurrences per Month for 2022", fontsize=18)
plt.xlabel('Month', fontsize=12)
plt.ylabel('Number of Occurrences', fontsize=12)
plt.show();

In [None]:
#Number of unique crime types in 2021
descriptCount2021 = crimesPerMonth2021[' PRIMARY DESCRIPTION'].nunique()
descriptCount2021

In [None]:
#Number of each crime type in 2021
descriptValCounts2021_df = crimesPerMonth2021[' PRIMARY DESCRIPTION'].value_counts().to_frame()
descriptValCounts2021_df.rename(columns = {' PRIMARY DESCRIPTION':'OCCURRENCES'}, inplace = True)
descriptValCounts2021_df = descriptValCounts2021_df.reset_index()
descriptValCounts2021_df = descriptValCounts2021_df.sort_values(by='OCCURRENCES', ascending=False)
descriptValCounts2021_df.rename(columns = {'index':'DESCRIPTION'}, inplace = True)
topTenDescriptValCounts2021_df = descriptValCounts2021_df.head(10)
topTenDescriptValCounts2021_df

In [None]:
sns.set(font_scale = 10)
sns.set_style('darkgrid')
sns.set(rc={"figure.figsize":(15,12)})
cmap = sns.color_palette('ch:s=.25,rot=-.25')
ax = sns.barplot(x='DESCRIPTION', y='OCCURRENCES', data=topTenDescriptValCounts2021_df, palette=cmap)
cmap = sns.color_palette('viridis')
plt.xlabel('CRIME TYPES', fontdict= {'fontsize':20})
plt.ylabel('NUMBER OF OCCURRENCES', fontdict= {'fontsize':20})
plt.title("NUMBER OF CRIMES BY TYPE (2021)",fontdict= { 'fontsize': 25, 'verticalalignment': 'center'}, y=1.02)
plt.xticks(fontsize=17, rotation=75)
plt.yticks(fontsize=17)
;

In [None]:
descriptCount2022 = crimesPerMonth2022[' PRIMARY DESCRIPTION'].nunique()
descriptCount2022

In [None]:
descriptValCounts2022_df = crimesPerMonth2022[' PRIMARY DESCRIPTION'].value_counts().to_frame()
descriptValCounts2022_df.rename(columns = {' PRIMARY DESCRIPTION':'OCCURRENCES'}, inplace = True)
descriptValCounts2022_df = descriptValCounts2022_df.reset_index()
descriptValCounts2022_df = descriptValCounts2022_df.sort_values(by='OCCURRENCES', ascending=False)
descriptValCounts2022_df.rename(columns = {'index':'DESCRIPTION'}, inplace = True)
topTenDescriptValCounts2022_df = descriptValCounts2022_df.head(10)
topTenDescriptValCounts2022_df

In [None]:
sns.set(font_scale = 10)
sns.set_style('darkgrid')
sns.set(rc={"figure.figsize":(15,12)})
cmap = sns.color_palette('ch:s=.25,rot=-.25')
ax = sns.barplot(x='DESCRIPTION', y='OCCURRENCES', data=topTenDescriptValCounts2022_df, palette=cmap)
cmap = sns.color_palette('viridis')
plt.xlabel('CRIME TYPES', fontdict= {'fontsize':20})
plt.ylabel('NUMBER OF OCCURRENCES', fontdict= {'fontsize':20})
plt.title("NUMBER OF CRIMES BY TYPE (2021)",fontdict= { 'fontsize': 25, 'verticalalignment': 'center'}, y=1.02)
plt.xticks(fontsize=17, rotation=75)
plt.yticks(fontsize=17)
;