In [2]:
import os

import pandas as pd # dataframe

import geopandas as gpd
from geopandas.tools import sjoin
from geopandas import GeoDataFrame
from shapely.geometry import Point

In [3]:
def getPolyCoords(row, geom, coord_type):
    """Returns the coordinates ('x' or 'y') of edges of a Polygon exterior"""

    # Parse the exterior of the coordinate
    exterior = row[geom].exterior

    if coord_type == 'x':
        # Get the x coordinates of the exterior
        return list( exterior.coords.xy[0] )
    elif coord_type == 'y':
        # Get the y coordinates of the exterior
        return list( exterior.coords.xy[1] )

In [4]:
pd.options.display.max_columns = 999
os.chdir('/Users/pbd28/OneDrive/Programming/Python/PhillyCrime')

df = pd.read_csv('incidents_part1_part2.csv', delimiter=',')

In [5]:
# Let's chuck the useless columns to save memory
columns = ['the_geom', 'the_geom_webmercator', 'psa', 'dc_key', 
           'ucr_general', 'dispatch_date', 'dispatch_time']
df.drop(columns, inplace=True, axis=1)

In [6]:
# Change data types for times to allow for time analysis.
df['dispatch_date_time'] = pd.to_datetime(df['dispatch_date_time'])
df['Month'] = df['dispatch_date_time'].dt.month
df['Year'] = df['dispatch_date_time'].dt.year

## Categorize the Data
A lot of these types of crimes are pretty similar, let's group the similar ones together. Our categories for crime types will be: Violent_Crime, Theft, and Other.

In [7]:
crimes_dict = {
              'Vandalism/Criminal Mischief': 'Theft',
              'Robbery Firearm': 'Theft',
              'Thefts': 'Theft',
              'Other Assaults': 'Violent_Crime',
              'Theft from Vehicle': 'Theft',
              'Fraud': 'Theft',
              'Aggravated Assault No Firearm': 'Violent_Crime',
              'Burglary Non-Residential': 'Theft',
              'Forgery and Counterfeiting': 'Theft',
              'Disorderly Conduct': 'Other',
              'Burglary Residential': 'Theft',
              'All Other Offenses': 'Other',
              'Aggravated Assault Firearm': 'Violent_Crime',
              'Robbery No Firearm': 'Theft',
              'Narcotic / Drug Law Violations': 'Other',
              'Vagrancy/Loitering': 'Other',
              'Other Sex Offenses (Not Commercialized)': 'Other',
              'Arson': 'Other',
              'Rape': 'Other',
              'Recovered Stolen Motor Vehicle':'Theft',
              'Liquor Law Violations': 'Other',
              'Weapon Violations': 'Violent_Crime',
              'Public Drunkenness': 'Other',
              'Embezzlement': 'Other',
              'Prostitution and Commercialized Vice': 'Other',
              'Receiving Stolen Property': 'Other',
              'Offenses Against Family and Children': 'Other',
              'Gambling Violations': 'Other',
              'DRIVING UNDER THE INFLUENCE': 'Other',
              'Motor Vehicle Theft': 'Theft',
              'Homicide - Criminal': 'Violent_Crime',
              'Homicide - Criminal': 'Violent_Crime',
              'Homicide - Justifiable': 'Violent_Crime', 
              'Homicide - Gross Negligence': 'Violent_Crime'
              }
df['Crime_Category'] = df['text_general_code'].map(crimes_dict)

In [8]:
# create a value column with a 1 for easy counting when aggregating
df['Value'] = 1

crime_category_count = df.groupby(['Crime_Category'])['Value'].sum()

In [9]:
# Load in shape file
os.chdir('/Users/pbd28/OneDrive/Programming/Python/PhillyCrime/Shapefiles/Neighborhoods_WGS84')

neighborhoods = gpd.GeoDataFrame.from_file('Neighborhoods_WGS84.shp')

In [10]:
# convert our dataframe into a geodataframe
# this is necessary for joining our data to our shapefiles
clean_geo_df = df.dropna(subset=['lat', 'lng'])

geometry = [Point(xy) for xy in zip(clean_geo_df.lng, clean_geo_df.lat)]
clean_geo_df = clean_geo_df.drop(['lng', 'lat'], axis=1)
crs = {'init': 'epsg:4326'}
geo_df = GeoDataFrame(clean_geo_df, crs=crs, geometry=geometry)
neighborhoods.crs = geo_df.crs # make sure the coordinate systems are the same

In [11]:
pointInPolys = sjoin(geo_df, neighborhoods, how='left', op='within')

In [12]:
pointInPolys['month_'] = pointInPolys['Month']
pointInPolys['year_'] = pointInPolys['Year']
columns = ['Month', 'Year', 'dc_dist', 'dispatch_date_time', 'location_block', 'text_general_code', 'point_x', 'point_y','geometry',
          'index_right', 'NAME', 'MAPNAME', 'Shape_Leng', 'Shape_Area']
pointInPolys.drop(columns, inplace=True, axis=1)

In [13]:
pointInPolys.to_csv('crimedata.csv', encoding='utf-8', index=False)

In [20]:
os.chdir('/Users/pbd28/OneDrive/Programming/Python/PhillyCrime')
df = pd.read_csv('crimedata.csv')

In [21]:
df = df.groupby('LISTNAME')['objectid'].agg(['count']).reset_index()

In [27]:
def print_full(x):
    pd.set_option('display.max_rows', len(x))
    print(x)
    pd.reset_option('display.max_rows')
print_full(df)

                        LISTNAME  count
0                Academy Gardens   3182
1                        Airport   3977
2                 Allegheny West  22991
3                        Andorra   1454
4               Aston-Woodbridge   2440
5                Bartram Village   4343
6                    Bella Vista   4373
7                        Belmont  11242
8                    Brewerytown  16627
9                     Bridesburg   6766
10                      Burholme   3104
11                     Bustleton  15620
12                       Byberry   1021
13                    Callowhill   8484
14                  Carroll Park  24500
15                    Cedar Park   9042
16                    Cedarbrook   8235
17              Center City East  16569
18                 Chestnut Hill   5781
19                     Chinatown   2615
20                     Clearview   2331
21                   Cobbs Creek  60706
22                 Crescentville   7805
23               Crestmont Farms    277
