# COVID and Crime Analysis
## Understanding the impact of COVID and Lockdown on Street Crime in 2020
Taking a look at street crime data in London for 2020, to understand what impact the COVID pandemic and subsequent lockdowns might have had on reported crime.

> Note: This is work in progress

![](https://www.notion.so/image/https%3A%2F%2Fs3-us-west-2.amazonaws.com%2Fsecure.notion-static.com%2F4f06a8f1-d0f4-43d3-bda6-452b5e7dc3cd%2FCharts-100721.png?table=block&id=b27605c8-8f25-47ee-bdea-a5839282b77b&spaceId=2211d575-debd-4659-b52c-32746d4222d1&width=4320&userId=96fcdb4b-dc90-48e3-9a12-7ecb7a9febac&cache=v2)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from glob import glob

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

data = sorted(glob('../input/covid-and-crime/*metropolitan-street.csv'))

crimes = pd.concat((pd.read_csv(file).assign(filename = file)
          for file in data), ignore_index = True)

# By default, the Month columns contains the year and month in the format yyyy-mm
# Create a new Year column from the Month column and
# update the Month column so that it only contains the Month value,

crimes['Year'] =  crimes.Month.str.slice(0,4)
crimes['Month'] = crimes.Month.str.slice(5)

# Split the 'LSOA name' column using rpartition()
# rpartition results in a dataframe ('result') comprising 3 columns;
# [0] The Region [1] A Space [2] The LSOA Code
# Use column [0] to create a new Region column

result = crimes['LSOA name'].str.rpartition()
crimes['Region'] = result[0]

# Drop the columns we don't need to simplify the dataframe
crimes.drop(['Crime ID', 'Reported by','Falls within', 'Longitude', 'Latitude', 'Location', 'LSOA code', 'LSOA name', 'Last outcome category', 'Context', 'filename'], axis=1, inplace=True)

# Tidy up the columns names, removing spaces and capitalizing correctly.
columns_headings = ['Month', 'Crime_Type', 'Year', 'Region']
crimes.columns = columns_headings

# Reorder the columns using '.reindex' 
neworder = ['Region','Year','Month','Crime_Type']
crimes=crimes.reindex(columns=neworder)


In [None]:
# Understanding the Missing Data / NaN Values
# crimes.shape # Reports 1,181,922 rows

with_region = crimes.Region.notna().sum()
without_region = crimes.Region.isna().sum()
total_crimes = crimes.Crime_Type.count()

percentage_missing = (without_region / total_crimes)
percentage_complete = (with_region / total_crimes)

fstring = f'Total Crimes: {total_crimes:,}\n'
fstring = fstring + f'Crimes with region: {with_region:,}\nCrimes missing region: {without_region:,}\n'
fstring = fstring  + f'Percentage missing Region: {percentage_missing:.2%}\n'
fstring = fstring  + f'Percentage with Region: {percentage_complete:.2%}'
print(fstring)

In [None]:
# Count Crimes by Region then Sort Descending
# The top 33 values will (should) contain all the London Boroughs
region_summary = crimes.groupby(['Region']).Region.count()
sorted_region_summary = region_summary.sort_values(ascending=False)
data = sorted_region_summary.head(33)
boroughs = data.index.to_list()

# Create one dataframe for Met Crimes and one for all others
met_crimes = crimes.loc[crimes.Region.isin(boroughs)]
non_met_crimes = crimes.loc[~crimes.Region.isin(boroughs)]

In [None]:
year = '2020'
crime = 'Anti-social behaviour'

plot_data = met_crimes.loc[(met_crimes.Year==year)&(met_crimes.Crime_Type==crime), :].groupby(['Region','Month']).Crime_Type.count().reset_index()
plot_data.columns=['Region', 'Month', 'Count']
plot_data.head()

In [None]:
df = plot_data.pivot(index='Month', columns='Region', values='Count')
df.head()

In [None]:
df2 = plot_data.pivot(index='Region', columns='Month', values='Count')
df2.head()

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (25,20)
# plt.rcParams['legend.loc'] = 1
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)

sns.set(font_scale = 10)
sns.set_theme(style="ticks")
sns.set_context("paper")
sns.color_palette("Spectral", as_cmap=True)

mychart = sns.lineplot(data=df, linewidth=1.5, dashes=False)
mychart.set_title((crime + ' by Month ' + '(' + year + ')'), fontsize = 25)
mychart.set_xlabel("Month", fontsize = 25)
mychart.set_ylabel("Count", fontsize = 25)

# mychart.legend(loc = 2, bbox_to_anchor= (1,.75,0.1,0.1), fontsize = 15, shadow = True )
mychart.legend(loc = 2,  bbox_to_anchor= (0.001,1), fontsize = 13, shadow = True )

plt.savefig('lineplot.png')

In [None]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

plt.rcParams['figure.figsize'] = (25,20)
# plt.rcParams['legend.loc'] = 1
plt.xticks(fontsize = 20)
plt.yticks(fontsize = 20)

sns.set(font_scale = 10)
sns.set_theme(style="darkgrid")
sns.set_context("paper")

mychart = sns.boxplot(data=df2, palette='Paired')
mychart.set_title((crime + ' by Month ' + '(' + year + ')'), fontsize = 25)
mychart.set_xlabel("Month", fontsize = 25)
mychart.set_ylabel("Count", fontsize = 25)

plt.savefig('boxplot.png')