In [84]:
# import dependencies

import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil.parser import parse


In [102]:
# read in BPD Victim-Based Crime Data CSV into a pandas dataframe

raw_data = pd.read_csv("BPD_Part_1_Victim_Based_Crime_Data.csv")


In [104]:
# do an initial exploration of the dataframe

raw_data.head()
# raw_data.count()
# raw_data.dtypes

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
0,08/10/2019,02:00:00,6D,1100 N EUTAW ST,LARCENY FROM AUTO,,,132.0,CENTRAL,MADISON PARK,-76.624263,39.302487,,,,1
1,08/10/2019,02:20:00,5C,3300 E LOMBARD ST,BURGLARY,O,,222.0,SOUTHEAST,BALTIMORE-LINWOOD,-76.570028,39.291473,,STREET,,1
2,08/10/2019,22:00:00,5C,800 MC ALEER CT,BURGLARY,I,,312.0,EASTERN,OLDTOWN,-76.605627,39.300215,,ROW/TOWNHOUSE-OCC,,1
3,08/10/2019,19:47:00,5B,1100 SOMERSET ST,BURGLARY,I,,311.0,EASTERN,JOHNSTON SQUARE,-76.601222,39.303707,,ROW/TOWNHOUSE-OCC,,1
4,08/10/2019,05:00:00,4E,1300 PENTRIDGE RD,COMMON ASSAULT,O,,413.0,NORTHEAST,NEW NORTHWOOD,-76.593203,39.347872,,STREET,,1


In [95]:
# convert date column from string to datetime

raw_data['CrimeDate']= pd.to_datetime(raw_data['CrimeDate']) 


In [94]:
# convert time column from string to datetime

raw_data['CrimeTime']= pd.to_datetime(raw_data['CrimeTime']) 
raw_data.dtypes


CrimeDate          datetime64[ns]
CrimeTime          datetime64[ns]
CrimeCode                  object
Location                   object
Description                object
Inside/Outside             object
Weapon                     object
Post                      float64
District                   object
Neighborhood               object
Longitude                 float64
Latitude                  float64
Location 1                float64
Premise                    object
vri_name1                  object
Total Incidents             int64
dtype: object

In [96]:
# identify oldest crime data point

raw_data["CrimeDate"].min()


Timestamp('1963-10-30 00:00:00')

In [98]:
# filter data to just include 2018 crimes & preview results by neighborhood

recent_year_data = raw_data[(raw_data["CrimeDate"] >= '2018-01-01 00:00:00') &
                            (raw_data["CrimeDate"] <= '2018-12-31 00:00:00')]
recent_year_data['Neighborhood'].value_counts()


DOWNTOWN                         1635
FRANKFORD                        1084
BELAIR-EDISON                    1042
BROOKLYN                          947
CANTON                            843
INNER HARBOR                      759
CHERRY HILL                       734
SANDTOWN-WINCHESTER               664
COLDSTREAM HOMESTEAD              603
CHARLES VILLAGE                   560
WASHINGTON VILLAGE                538
MOUNT VERNON                      534
BALTIMORE-LINWOOD                 528
FELLS POINT                       520
UPTON                             504
MORRELL PARK                      501
MCELDERRY PARK                    483
MIDDLE EAST                       476
BROADWAY EAST                     450
HARFORD-ECHODALE-PER              439
CARROLLTON RIDGE                  435
HAMPDEN                           434
RESERVOIR HILL                    422
REISTERSTOWN STATION              411
LAKELAND                          399
MONDAWMIN                         398
ELLWOOD PARK

In [None]:
# create buckets for each decade


In [108]:
# for comparison, review the number of districts included in the dataset

recent_year_data["District"].value_counts()


SOUTHEAST    7135
NORTHEAST    7051
CENTRAL      5619
SOUTHERN     5584
SOUTHWEST    5363
NORTHERN     5001
NORTHWEST    4514
EASTERN      4296
WESTERN      3787
UNKNOWN        15
Name: District, dtype: int64

In [135]:
# drop any columns not relevant to our analysis

recent_data_narrowed = recent_year_data.drop(columns=['Location 1',
                                           'vri_name1', 
                                           'Total Incidents', 
                                           'Post'])
recent_data_narrowed.head()

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,District,Neighborhood,Longitude,Latitude,Premise
27387,2018-12-31,2019-08-20 12:45:00,4E,5000 PARK HEIGHTS AVE,COMMON ASSAULT,O,,NORTHWEST,CENTRAL PARK HEIGHTS,-76.673717,39.347617,PARKING LOT-OUTSIDE
27388,2018-12-31,2019-08-20 12:42:00,6G,1700 EUTAW PL,LARCENY,I,,CENTRAL,BOLTON HILL,-76.629898,39.30728,SPECIALTY STORE
27389,2018-12-31,2019-08-20 22:07:00,3AK,100 S MONROE ST,ROBBERY - STREET,O,KNIFE,SOUTHERN,CARROLLTON RIDGE,-76.646849,39.286096,STREET
27390,2018-12-31,2019-08-20 13:30:00,5D,2300 ORLEANS ST,BURGLARY,I,,SOUTHEAST,PATTERSON PLACE,-76.584173,39.295517,OTHER - INSIDE
27391,2018-12-31,2019-08-20 18:00:00,4E,100 W 29TH ST,COMMON ASSAULT,I,,NORTHERN,CHARLES VILLAGE,-76.619453,39.32307,APT/CONDO - OCCUPIED


In [139]:
# group data by crime and then neighborhood
# use output to create new dataframe

recent_data = recent_data_narrowed.groupby(['Neighborhood', 'Description'], as_index=False).count()
crimes_2018 = pd.DataFrame(recent_data)
crimes_2018


Unnamed: 0,Neighborhood,Description,CrimeDate,CrimeTime,CrimeCode,Location,Inside/Outside,Weapon,District,Longitude,Latitude,Premise
0,4X4,AGG. ASSAULT,8,8,8,8,7,8,8,8,8,7
1,4X4,ARSON,1,1,1,1,1,1,1,1,1,1
2,4X4,AUTO THEFT,7,7,7,5,7,0,7,6,6,7
3,4X4,BURGLARY,14,14,14,14,13,0,14,14,14,13
4,4X4,COMMON ASSAULT,14,14,14,14,10,0,14,14,14,10
5,4X4,LARCENY,11,11,11,11,10,0,11,11,11,10
6,4X4,LARCENY FROM AUTO,7,7,7,7,6,0,7,7,7,6
7,4X4,ROBBERY - RESIDENCE,2,2,2,2,1,1,2,2,2,1
8,4X4,ROBBERY - STREET,2,2,2,2,2,1,2,2,2,2
9,ABELL,AGG. ASSAULT,11,11,11,11,9,11,11,11,11,9
