In [1]:
# import dependencies

import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
from dateutil.parser import parse


In [2]:
# read in BPD Victim-Based Crime Data CSV into a pandas dataframe

raw_data = pd.read_csv("BPD_Part_1_Victim_Based_Crime_Data.csv")


In [3]:
# do an initial exploration of the dataframe

raw_data.head()
# raw_data.count()
# raw_data.dtypes

Unnamed: 0,CrimeDate,CrimeTime,CrimeCode,Location,Description,Inside/Outside,Weapon,Post,District,Neighborhood,Longitude,Latitude,Location 1,Premise,vri_name1,Total Incidents
0,08/10/2019,02:00:00,6D,1100 N EUTAW ST,LARCENY FROM AUTO,,,132.0,CENTRAL,MADISON PARK,-76.624263,39.302487,,,,1
1,08/10/2019,02:20:00,5C,3300 E LOMBARD ST,BURGLARY,O,,222.0,SOUTHEAST,BALTIMORE-LINWOOD,-76.570028,39.291473,,STREET,,1
2,08/10/2019,22:00:00,5C,800 MC ALEER CT,BURGLARY,I,,312.0,EASTERN,OLDTOWN,-76.605627,39.300215,,ROW/TOWNHOUSE-OCC,,1
3,08/10/2019,19:47:00,5B,1100 SOMERSET ST,BURGLARY,I,,311.0,EASTERN,JOHNSTON SQUARE,-76.601222,39.303707,,ROW/TOWNHOUSE-OCC,,1
4,08/10/2019,05:00:00,4E,1300 PENTRIDGE RD,COMMON ASSAULT,O,,413.0,NORTHEAST,NEW NORTHWOOD,-76.593203,39.347872,,STREET,,1


In [4]:
# convert date column from string to datetime

raw_data['CrimeDate']= pd.to_datetime(raw_data['CrimeDate']) 


In [5]:
# convert time column from string to datetime

raw_data['CrimeTime']= pd.to_datetime(raw_data['CrimeTime']) 
raw_data.dtypes


CrimeDate          datetime64[ns]
CrimeTime          datetime64[ns]
CrimeCode                  object
Location                   object
Description                object
Inside/Outside             object
Weapon                     object
Post                      float64
District                   object
Neighborhood               object
Longitude                 float64
Latitude                  float64
Location 1                float64
Premise                    object
vri_name1                  object
Total Incidents             int64
dtype: object

In [7]:
# filter data to just include 2018 crimes & preview results by neighborhood

recent_year_data = raw_data[(raw_data["CrimeDate"] >= '2018-01-01 00:00:00') &
                            (raw_data["CrimeDate"] <= '2018-12-31 00:00:00')]
recent_year_data['Neighborhood'].value_counts()


DOWNTOWN                         1635
FRANKFORD                        1084
BELAIR-EDISON                    1042
BROOKLYN                          947
CANTON                            843
INNER HARBOR                      759
CHERRY HILL                       734
SANDTOWN-WINCHESTER               664
COLDSTREAM HOMESTEAD              603
CHARLES VILLAGE                   560
WASHINGTON VILLAGE                538
MOUNT VERNON                      534
BALTIMORE-LINWOOD                 528
FELLS POINT                       520
UPTON                             504
MORRELL PARK                      501
MCELDERRY PARK                    483
MIDDLE EAST                       476
BROADWAY EAST                     450
HARFORD-ECHODALE-PER              439
CARROLLTON RIDGE                  435
HAMPDEN                           434
RESERVOIR HILL                    422
REISTERSTOWN STATION              411
LAKELAND                          399
MONDAWMIN                         398
ELLWOOD PARK

In [36]:
# filter data to just include top 5 neighborhoods in terms of number of crimes

recent_year_sorted_data = recent_year_data[(recent_year_data["Neighborhood"] == 'DOWNTOWN') |
                            (recent_year_data["Neighborhood"] == 'FRANKFORD') |
                            (recent_year_data["Neighborhood"] == 'BELAIR-EDISON') |
                            (recent_year_data["Neighborhood"] == 'BROOKLYN') |
                            (recent_year_data["Neighborhood"] == 'CANTON')]

recent_year_sorted_data['Neighborhood'].value_counts()

DOWNTOWN         1635
FRANKFORD        1084
BELAIR-EDISON    1042
BROOKLYN          947
CANTON            843
Name: Neighborhood, dtype: int64

In [37]:
# filter data to just include top 10 neighborhoods in terms of number of crimes

# recent_year_sorted_data = recent_year_data[(recent_year_data["Neighborhood"] == 'DOWNTOWN') |
#                             (recent_year_data["Neighborhood"] == 'FRANKFORD') |
#                             (recent_year_data["Neighborhood"] == 'BELAIR-EDISON') |
#                             (recent_year_data["Neighborhood"] == 'BROOKLYN') |
#                             (recent_year_data["Neighborhood"] == 'CANTON') |
#                             (recent_year_data["Neighborhood"] == 'INNER HARBOR') |
#                             (recent_year_data["Neighborhood"] == 'CHERRY HILL') |
#                             (recent_year_data["Neighborhood"] == 'SANDTOWN-WINCHESTER') |
#                             (recent_year_data["Neighborhood"] == 'COLDSTREAM HOMESTEAD') |
#                             (recent_year_data["Neighborhood"] == 'CHARLES VILLAGE')]

# recent_year_sorted_data['Neighborhood'].value_counts()


In [38]:
# drop any columns irrelevant to our exploration, and add a count column

recent_data_narrowed = recent_year_sorted_data.drop(columns=['CrimeDate','CrimeTime',
                                           'CrimeCode','Location','Location 1', 'District',
                                           'vri_name1', 'Longitude', 'Latitude',
                                           'Total Incidents', 
                                            'Inside/Outside',
                                           'Weapon',
                                           'Post', 'Premise'])
recent_data_narrowed['NumCrimes']= 1
recent_data_narrowed.head()

Unnamed: 0,Description,Neighborhood,NumCrimes
27394,LARCENY FROM AUTO,BELAIR-EDISON,1
27396,LARCENY,DOWNTOWN,1
27398,COMMON ASSAULT,DOWNTOWN,1
27413,LARCENY FROM AUTO,CANTON,1
27425,LARCENY,BROOKLYN,1


In [39]:
# rename crimes, aggregating into the 5 legal categories we're using for the heatmaps

replace_values = {'COMMON ASSAULT' : 'cr_ag_person', 
                  'AGG. ASSAULT' : 'cr_ag_person', 
                  'RAPE' : 'cr_ag_person',
                  'ROBBERY - COMMERCIAL' : 'cr_ag_person',
                  'ROBBERY - RESIDENCE' : 'cr_ag_person',
                  'ROBBERY - CARJACKING' : 'cr_ag_person',
                  'ROBBERY - STREET' : 'cr_ag_person',
                  'BURGLARY' : 'cr_ag_prop',
                  'ARSON' : 'cr_ag_prop',
                  'AUTO THEFT' : 'larceny',
                  'LARCENY FROM AUTO' : 'larceny',
                  'LARCENY' : 'larceny',
                  'SHOOTING' : 'shooting',
                  'HOMICIDE' : 'homicide'
                 }
recent_data_narrowed = recent_data_narrowed.replace({'Description': replace_values}) 

In [40]:
# group data by neigborhood and crime time (description)

crimes = recent_data_narrowed.groupby(['Neighborhood', 'Description'], as_index=False).count()
crimes_by_neighborhood = pd.DataFrame(crimes)
crimes_by_neighborhood


Unnamed: 0,Neighborhood,Description,NumCrimes
0,BELAIR-EDISON,cr_ag_person,413
1,BELAIR-EDISON,cr_ag_prop,118
2,BELAIR-EDISON,homicide,8
3,BELAIR-EDISON,larceny,488
4,BELAIR-EDISON,shooting,15
5,BROOKLYN,cr_ag_person,352
6,BROOKLYN,cr_ag_prop,208
7,BROOKLYN,homicide,10
8,BROOKLYN,larceny,365
9,BROOKLYN,shooting,12


In [41]:
# export to csv

crimes_by_neighborhood_csv = crimes_by_neighborhood.to_csv (r'/Users/scottgregory/Desktop/Project/Baltimore_website/assets/db/crimes_by_neighborhood_csv', index = None, header=True)

