In [None]:
# import relevant libraries
import pandas as pd

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
# load data into a pandas dataframe
crime_df = pd.read_csv("dataset/crime.csv", encoding="ISO-8859-1")

In [None]:
# get the dimension of the dataframe
crime_df.shape

In [None]:
# get the fields
crime_df.columns

In [None]:
# get the information about the dataframe
crime_df.info()

In [None]:
# check if the data contains NULL values
crime_df.isnull().sum().any()
# True, meaning there are NULL values

In [None]:
# get the value count of 'SHOOTING' column
crime_df['SHOOTING'].value_counts()

In [None]:
# drop a column from the dataframe
updated_crimedf = crime_df.drop(['SHOOTING'], axis=1)

In [None]:
# check columns
updated_crimedf.columns

In [None]:
# drop the rows where at least one element is missing
cleaned_crimedf = updated_crimedf.dropna()

In [None]:
# check the new dimension of the dataframe
cleaned_crimedf.shape

In [None]:
# import datetime
from datetime import datetime

# convert string to timestamp
cleaned_crimedf["OCCURRED_ON_DATE"] = cleaned_crimedf["OCCURRED_ON_DATE"].apply(lambda x: \
    datetime.strptime(x,"%Y-%m-%d %H:%M:%S"))

In [None]:
# split OCCURRED_ON_DATE into date and time
cleaned_crimedf['DATE'] = [d.date() for d in cleaned_crimedf['OCCURRED_ON_DATE']]
cleaned_crimedf['TIME'] = [d.time() for d in cleaned_crimedf['OCCURRED_ON_DATE']]

In [None]:
# display first five fields
cleaned_crimedf.head()

In [None]:
crimedf = cleaned_crimedf

# Number of Crimes for each Time Period

### Daily Crime Count

In [None]:
# get the number of crimes for each day
crime_count_by_date = pd.DataFrame(crimedf.groupby('DATE').size().sort_values(ascending=False).rename('COUNT').reset_index())

In [None]:
# display the dataframe
crime_count_by_date.head()

In [None]:
# get the shape of the dataframe
crime_count_by_date.shape

In [None]:
# get the dataframe summary
crime_count_by_date.info()

In [None]:
# import relevant libraries
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="whitegrid")

In [None]:
# create the matplotlib figure
fig, ax = plt.subplots(figsize=(20, 5))

# plot the graph of number of crimes vs. date
# First 30 maximum number of crimes will be plotted
barplot1 = sns.barplot(x="DATE", y="COUNT", data=crime_count_by_date.iloc[:30, :], color="g")
# set the axis labels
ax.set(ylabel="Number of Crimes", xlabel="Date")
# rotate xticklabels
barplot1.set_xticklabels(barplot1.get_xticklabels(),
                        rotation=45,
                        horizontalalignment='right',
                        fontweight='light',
                        fontsize='large'
                        )

In [None]:
# get the number of crimes for each day without sorting
new_crime_count_by_date = pd.DataFrame(crimedf.groupby('DATE').size().rename('COUNT').reset_index())

In [None]:
# plot the line graph in order to observe the pattern
# Number of crimes for first 60 days will be plotted
lineplot1 = new_crime_count_by_date.iloc[:60, :].plot.line(x="DATE", y="COUNT", figsize=(20, 10))

### Yearly Crime Count

In [None]:
# get the number of crimes for each year
crime_count_by_year = pd.DataFrame(crimedf.groupby('YEAR').size().rename('COUNT').reset_index())

In [None]:
crime_count_by_year.head()

In [None]:
# create the matplotlib figure
fig, ax = plt.subplots(figsize=(8, 5))

# plot the graph of number of crimes vs. year
barplot2 = sns.barplot(x="YEAR", y="COUNT", data=crime_count_by_year, color="g")
# set the axis labels
ax.set(ylabel="Number of Crimes", xlabel="Year")
# rotate xticklabels
barplot2.set_xticklabels(barplot2.get_xticklabels(),
                        rotation=45,
                        horizontalalignment='right',
                        fontweight='light',
                        fontsize='x-large'
                        )

In [None]:
# plot the line graph in order to observe the pattern
# Number of crimes for each year will be plotted
lineplot2 = crime_count_by_year.plot.line(x="YEAR", y="COUNT", figsize=(12, 6))