We're going to start by grabbing the geometry for the Austin community area.

In [1]:
import requests
from shapely.geometry import shape, Point

r = requests.get('https://data.cityofchicago.org/api/geospatial/cauq-8yn6?method=export&format=GeoJSON')

for feature in r.json()['features']:
    if feature['properties']['community'] == 'AUSTIN':
        austin = feature

poly = shape(austin['geometry'])

Now let's get the shootings data.

In [88]:
import os

def get_data(table):
    r = requests.get('%stable/json/%s' % (os.environ['NEWSROOMDB_URL'], table))
    return r.json()

shootings = get_data('shootings')
homicides = get_data('homicides')

Now let's iterate through the shootings, generate shapely points and check to see if they're in the geometry we care about.

In [90]:
shootings_ca = []

for row in shootings:
    if not row['Geocode Override']:
        continue
    points = row['Geocode Override'][1:-1].split(',')
    if len(points) != 2:
        continue
    point = Point(float(points[1]), float(points[0]))
    row['point'] = point
    if poly.contains(point):
        shootings_ca.append(row)

print 'Found %d shootings in this community area' % len(shootings_ca)
for f in shootings_ca:
    print f['Date'], f['Time'],  f['Age'], f['Sex'], f['Shooting Location']

Found 907 shootings in this community area
2011-09-25 16:30 22 M 1300 N. Mayfield Avenue, Chicago, IL
2011-12-18 20:00 19 F 1159 N. Parkside Avenue, Chicago, IL
2011-12-21 04:20 46 M 101 N. Pine Avenue, Chicago, IL
2011-12-24 21:29 20 M 5501 W. Bloomingdale Avenue, Chicago, IL
2011-12-29 20:30 41 M 1201 N. Austin Boulevard, Chicago, IL
2012-01-31 21:48 17 M 5846 W. Augusta Boulevard, Chicago, IL
2012-03-03 23:28 20 M 5331 W. Congress Parkway, Chicago, IL
2012-03-11 01:45 15 M 958 N. Massasoit Avenue, Chicago, IL
2012-03-11 01:45 14 M 958 N. Massasoit Avenue, Chicago, IL
2012-03-15 18:18 21 M 4945 W. Ferdinand Street, Chicago, IL
2012-03-15 19:42 15 M 5522 W. Monroe Street, Chicago, IL
2012-03-25 16:45 22 M 1501 N. Lockwood Avenue, Chicago, IL
2012-06-01 22:58 30 M 5045 W. Congress Parkway, Chicago, IL
2012-06-02 21:31 28 M 5301 W. Monroe Street, Chicago, IL
2012-06-03 01:15 20 M 5733 W. Bloomingdale Avenue, Chicago, IL
2012-06-05 14:14 22 M 1055 N. Long Avenue, Chicago, IL
2012-06-15 2

Let's do something similar with homicides. It's exactly the same, in fact, but a few field names are different.

In [89]:
homicides_ca = []
years = {}

for row in homicides:
    if not row['Geocode Override']:
        continue
    points = row['Geocode Override'][1:-1].split(',')
    if len(points) != 2:
        continue
    point = Point(float(points[1]), float(points[0]))
    row['point'] = point
    if poly.contains(point):
        homicides_ca.append(row)

print 'Found %d homicides in this community area' % len(homicides_ca)
for f in homicides_ca:
    print f['Occ Date'], f['Occ Time'],  f['Age'], f['Sex'], f['Address of Occurrence']
    if not f['Occ Date']:
        continue
    dt = datetime.strptime(f['Occ Date'], '%Y-%m-%d')
    if dt.year not in years:
        years[dt.year] = 0
    years[dt.year] += 1
print years

Found 134 homicides in this community area
2013-02-24 07:30 31 M 124 S. Cicero Avenue, Chicago, IL
2013-01-18 21:10 21 M 5500 W. North Avenue, Chicago, IL
2013-02-11 23:20 22 M 5046 W. Ohio Street, Chicago, IL
2015-10-19 19:25 20 M W Jackson Blvd & S Cicero Ave, Chicago, IL 60644
2015-10-26 1:30 29 M 4803 W Madison St, Chicago, IL 60644
2013-04-19 19:45 37 M 1106 N. Menard Avenue, Chicago, IL
2013-05-17 20:05 40 M 225 N. Mayfield Avenue, Chicago, IL
2013-05-18 03:55 27 M 4821 W. Iowa Street, Chicago, IL
2014-05-16 4:46 38 M 5152 W Concord Pl, Chicago, IL 60639
2014-08-18 23:35 27 M 802 S Central Ave, Chicago, IL 60644
2014-08-24 2:00 33 M 5029 W Potomac Ave, Chicago, IL 60651
2015-09-17 20:40 28 M 224 S Laramie Ave, Chicago, IL 60644
2015-09-18 22:55 30 M 431 S Central Ave, Chicago, IL 60644
2015-10-09 12:43 15 M 4927 W Congress Pkwy, Chicago, IL 60644
2015-10-23 20:45 43 M 5319 W North Ave, Chicago, IL 60639
2016-03-10 4:50 37 M 4601 W Monroe St, Chicago, IL 60644
2013-09-27 01:20 45 

Now let's see how many homicides we can associate with shootings. We'll say that if the locations are within five meters and the date and time of the shooting is within 10 minutes of the homicide, they're the same incident.

In [12]:
import pyproj
from datetime import datetime, timedelta

geod = pyproj.Geod(ellps='WGS84')
associated = []

for homicide in homicides_ca:
    if not homicide['Occ Time']:
        homicide['Occ Time'] = '00:01'
    if not homicide['Occ Date']:
        homicide['Occ Date'] = '2000-01-01'
    homicide_dt = datetime.strptime('%s %s' % (homicide['Occ Date'], homicide['Occ Time']), '%Y-%m-%d %H:%M')
    for shooting in shootings_ca:
        if not shooting['Time']:
            shooting['Time'] = '00:01'
        if not shooting['Time']:
            shooting['Time'] = '2000-01-01'
        shooting_dt = datetime.strptime('%s %s' % (shooting['Date'], shooting['Time']), '%Y-%m-%d %H:%M')
        diff = homicide_dt - shooting_dt
        seconds = divmod(diff.days * 86400 + diff.seconds, 60)[0]
        if abs(seconds) <= 600:
            angle1, angle2, distance = geod.inv(
                homicide['point'].x, homicide['point'].y, shooting['point'].x, shooting['point'].y)
            if distance < 5:
                associated.append((homicide, shooting))
                break
print len(associated)

88


In [20]:
years = {}

for homicide in homicides:
    if not homicide['Occ Date']:
        continue
    dt = datetime.strptime(homicide['Occ Date'], '%Y-%m-%d')
    if dt.year not in years:
        years[dt.year] = 0
    years[dt.year] += 1

print years

{2016: 128, 1988: 1, 1992: 1, 1993: 1, 2000: 1, 2003: 1, 2004: 1, 2009: 2, 2010: 1, 2011: 1, 2012: 3, 2013: 443, 2014: 443, 2015: 488}


In [91]:
from csv import DictWriter
from ftfy import fix_text, guess_bytes

for idx, row in enumerate(shootings_ca):
    if 'point' in row.keys():
        del row['point']
    for key in row:
        #print idx, key, row[key]
        if type(row[key]) is str:
            #print row[key]
            row[key] = fix_text(row[key].replace('\xa0', '').decode('utf8'))

for idx, row in enumerate(homicides_ca):
    if 'point' in row.keys():
        del row['point']
    for key in row:
        #print idx, key, row[key]
        if type(row[key]) is str:
            #print row[key]
            row[key] = row[key].decode('utf8')


with open('/Users/abrahamepton/Documents/austin_shootings.csv', 'w+') as fh:
    writer = DictWriter(fh, sorted(shootings_ca[0].keys()))
    writer.writeheader()
    for row in shootings_ca:
        try:
            writer.writerow(row)
        except:
            print row

with open('/Users/abrahamepton/Documents/austin_homicides.csv', 'w+') as fh:
    writer = DictWriter(fh, sorted(homicides_ca[0].keys()))
    writer.writeheader()
    for row in homicides_ca:
        try:
            writer.writerow(row)
        except:
            print row