Importing packages

In [6]:
import pandas as pd
import requests
import csv
from collections import Counter
from collections import defaultdict
from datetime import datetime

1. Importing data (last 12 months)

Getting response from City of Chicago open data API

In [7]:
API_endpoint = 'https://data.cityofchicago.org/resource/x2n5-8w5q.csv'
API_query = '?$select=date_of_occurrence,block,_primary_decsription,_secondary_description,_location_description,arrest,domestic'
URL = API_endpoint + API_query
response = requests.get(URL)

In [8]:
print(list(response))

[b'"date_of_occurrence","block","_primary_decsription","_secondary_description","_location_description","arrest","domestic"\n"2019-0', b'6-24T18:20:00.000","041XX S DREXEL BLVD","THEFT","$500 AND UNDER","RESIDENTIAL YARD (FRONT/BACK)","N","N"\n"2019-11-03T11:40:00.0', b'00","032XX N CLARK ST","THEFT","RETAIL THEFT","DEPARTMENT STORE","N","N"\n"2019-05-19T11:00:00.000","018XX S WOOD ST","DECEPTIVE ', b'PRACTICE","ILLEGAL USE CASH CARD","ATM (AUTOMATIC TELLER MACHINE)","N","N"\n"2019-10-04T06:10:00.000","004XX S LA SALLE ST","ASSA', b'ULT","SIMPLE","SIDEWALK","N","N"\n"2019-05-20T15:00:00.000","010XX N WINCHESTER AVE","THEFT","OVER $500","APARTMENT","N","N"\n"201', b'9-12-05T14:30:00.000","013XX W GLENLAKE AVE","THEFT","$500 AND UNDER","RESIDENCE PORCH/HALLWAY","N","N"\n"2019-07-01T20:10:00.000', b'","064XX N DAMEN AVE","THEFT","$500 AND UNDER","APARTMENT","N","N"\n"2019-06-24T18:24:00.000","077XX S PAULINA ST","BATTERY","SIM', b'PLE","SIDEWALK","N","N"\n"2019-12-05T18:43:00.000","038XX 

Reading data from response into a list of tupples

In [9]:
text = csv.reader(response.text.strip().split('\n'))

data = []

for row in text:
    data.append((row[0], row[1], row[2], row[3], row[4], row[5], row[6]))

data.pop(0)

print(data[:5])

[('2019-06-24T18:20:00.000', '041XX S DREXEL BLVD', 'THEFT', '$500 AND UNDER', 'RESIDENTIAL YARD (FRONT/BACK)', 'N', 'N'), ('2019-11-03T11:40:00.000', '032XX N CLARK ST', 'THEFT', 'RETAIL THEFT', 'DEPARTMENT STORE', 'N', 'N'), ('2019-05-19T11:00:00.000', '018XX S WOOD ST', 'DECEPTIVE PRACTICE', 'ILLEGAL USE CASH CARD', 'ATM (AUTOMATIC TELLER MACHINE)', 'N', 'N'), ('2019-10-04T06:10:00.000', '004XX S LA SALLE ST', 'ASSAULT', 'SIMPLE', 'SIDEWALK', 'N', 'N'), ('2019-05-20T15:00:00.000', '010XX N WINCHESTER AVE', 'THEFT', 'OVER $500', 'APARTMENT', 'N', 'N')]


2. What Months is the number of crimes the highest?

instantiating Counter object

In [10]:
monthly_crimes = Counter()

looping over data to get number of crimes per month

In [11]:
for row in data:
    date = datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%S.%f")
    monthly_crimes[date.month] += 1

print(sorted(monthly_crimes.items()))

[(3, 2), (5, 13), (6, 8), (7, 21), (8, 71), (9, 675), (10, 28), (11, 180), (12, 2)]


Print top 3 crime prone months

In [36]:
top3_months = monthly_crimes.most_common(3)
print(top3_months[0][0])
print(top3_months[1][0])
print(top3_months[2][0])


9
11
8


CONCLUSION: most crimes happen in the above months

3. Where do the above crimes happen most often?

instantiating defaultdict object

In [13]:
location_monthly_crimes = defaultdict(list)

looping over data to get a list of crime locations per month

In [14]:
for row in data:
    date = datetime.strptime(row[0], "%Y-%m-%dT%H:%M:%S.%f")
    location_monthly_crimes[date.month].append(row[4])

In [18]:
print(location_monthly_crimes)

defaultdict(<class 'list'>, {6: ['RESIDENTIAL YARD (FRONT/BACK)', 'SIDEWALK', 'STREET', 'RESIDENCE', 'RESIDENCE PORCH/HALLWAY', 'RESIDENCE', 'RESIDENCE', 'SIDEWALK'], 11: ['DEPARTMENT STORE', 'CTA BUS', 'APARTMENT', 'RESIDENCE PORCH/HALLWAY', 'VEHICLE-COMMERCIAL', 'STREET', 'OTHER', 'STREET', 'SMALL RETAIL STORE', 'RESIDENCE', 'APARTMENT', 'STREET', 'STREET', 'SIDEWALK', 'GAS STATION', 'SIDEWALK', 'GAS STATION', 'RESIDENCE', 'DEPARTMENT STORE', 'RESIDENCE', 'STREET', 'STREET', 'SIDEWALK', 'STREET', 'RESIDENCE', 'SCHOOL, PUBLIC, GROUNDS', 'STREET', 'STREET', 'DRIVEWAY - RESIDENTIAL', 'GROCERY FOOD STORE', 'APARTMENT', 'OTHER', 'SMALL RETAIL STORE', 'STREET', 'RESTAURANT', 'RESIDENCE', 'STREET', 'APARTMENT', 'SIDEWALK', 'APARTMENT', 'RESIDENCE', 'STREET', 'STREET', 'SMALL RETAIL STORE', 'STREET', 'STREET', 'STREET', 'STREET', 'SIDEWALK', 'STREET', 'RESIDENCE', 'PARKING LOT/GARAGE(NON.RESID.)', 'SCHOOL, PUBLIC, BUILDING', 'OTHER', 'RESIDENCE PORCH/HALLWAY', 'HOTEL/MOTEL', 'SMALL RETAIL ST

count location per month using Counter

In [34]:
for month, place in location_monthly_crimes.items():
    count = Counter(place)
    if month in {top3_months[0][0],top3_months[1][0],top3_months[2][0]}:
        print(month)
        print(count.most_common(3))

11
[('STREET', 45), ('RESIDENCE', 25), ('APARTMENT', 22)]
8
[('RESIDENCE', 18), ('APARTMENT', 9), ('OTHER', 8)]
9
[('STREET', 121), ('RESIDENCE', 103), ('APARTMENT', 87)]


CONCLUSION: The most crime prone places grouped by most crime prone months are listed above...