In [3]:
# Import the csv module
import csv

# Create the file object: csvfile
csvfile = open('Datasets/chicago_crime.csv', 'r')

# Create an empty list: crime_data
crime_data = []

# Loop over a csv reader on the file object
for row in csv.reader(csvfile):

    # Append the date, type of crime, location description, and arrest
    crime_data.append((row[0], row[2], row[4], row[5]))
    
# Remove the first element(header) from crime_data
crime_data.pop(0)

# Print the first 10 records
print(crime_data[:5])

[('05/23/2016 05:35:00 PM', 'ASSAULT', 'STREET', 'false'), ('03/26/2016 08:20:00 PM', 'BURGLARY', 'SMALL RETAIL STORE', 'false'), ('04/25/2016 03:05:00 PM', 'THEFT', 'DEPARTMENT STORE', 'true'), ('04/26/2016 05:30:00 PM', 'BATTERY', 'SIDEWALK', 'false'), ('06/19/2016 01:15:00 AM', 'BATTERY', 'SIDEWALK', 'false')]


## 1) Find the Months with the Highest Number of Crimes

In [7]:
# Import necessary modules
from collections import Counter
from datetime import datetime

# Create a Counter Object: crimes_by_month
crimes_by_month = Counter()

# Loop over the crime_data list
for row in crime_data:
    
    # Convert the first element of each item into a Python Datetime Object
    date = datetime.strptime(row[0], '%m/%d/%Y %I:%M:%S %p')
    
    # Increment the counter for the month of the row by one
    crimes_by_month[date.month] += 1
    
# Print the 3 most common months for crime
print(crimes_by_month.most_common(3))

[(1, 1948), (2, 1862), (7, 1257)]


#### It looks like the months with the highest number of crimes are January, February, and July.

## 2) Transforming your Data Containers to Month and Location

In [35]:
from collections import defaultdict

locations_by_month = defaultdict(list)

for row in crime_data[0:50]:
    date = datetime.strptime(row[0], '%m/%d/%Y %I:%M:%S %p')
    
    if date.year == 2016:
        locations_by_month[date.month].append(row[2])
    
print(locations_by_month)

defaultdict(<class 'list'>, {5: ['STREET', 'GAS STATION', '', 'PARKING LOT/GARAGE(NON.RESID.)', 'RESIDENCE', 'STREET'], 3: ['SMALL RETAIL STORE'], 4: ['DEPARTMENT STORE', 'SIDEWALK', 'VEHICLE NON-COMMERCIAL'], 6: ['SIDEWALK', 'STREET', 'BAR OR TAVERN', 'SCHOOL, PUBLIC, GROUNDS', 'STREET', 'RESIDENCE', 'APARTMENT'], 7: ['OTHER', 'PARKING LOT/GARAGE(NON.RESID.)', 'APARTMENT', 'STREET', 'STREET'], 10: ['STREET', 'NURSING HOME/RETIREMENT HOME', 'CTA PLATFORM'], 12: ['APARTMENT', 'STREET'], 1: ['BAR OR TAVERN', 'TAVERN/LIQUOR STORE', 'CLEANING STORE'], 9: ['RESIDENCE', 'DRIVEWAY - RESIDENTIAL', 'STREET', 'GAS STATION', 'STREET', 'RESIDENCE', 'WAREHOUSE', 'STREET'], 11: ['RESIDENCE', 'RESIDENCE'], 8: ['RESIDENCE'], 2: ['SCHOOL, PUBLIC, BUILDING']})


## 3) Find the Most Common Crimes by Location Type by Month in 2016

In [33]:
# Loop over the items from locations_by_month using tuple expansion of the month and locations
for month, locations in locations_by_month.items():
    # Make a Counter of the locations
    location_count = Counter(locations)
    # Print the month 
    print(month)
    # Print the most common location
    print(location_count.most_common(2))

5
[('STREET', 2), ('GAS STATION', 1)]
3
[('SMALL RETAIL STORE', 1)]
4
[('DEPARTMENT STORE', 1), ('SIDEWALK', 1)]
6
[('STREET', 2), ('SIDEWALK', 1)]
7
[('STREET', 2), ('OTHER', 1)]
10
[('STREET', 1), ('NURSING HOME/RETIREMENT HOME', 1)]
12
[('APARTMENT', 1), ('STREET', 1)]
1
[('BAR OR TAVERN', 1), ('TAVERN/LIQUOR STORE', 1)]
9
[('STREET', 3), ('RESIDENCE', 2)]
11
[('RESIDENCE', 2)]
8
[('RESIDENCE', 1)]
2
[('SCHOOL, PUBLIC, BUILDING', 1)]


## 4) Reading your Data with DictReader and Establishing your Data Containers again

In [57]:
# Create the CSV file: csvfile
csvfile = open('Datasets/chicago_crime.csv', 'r')

# Create a dictionary that defaults to a list: crimes_by_district
crimes_by_district = defaultdict(list)

# Loop over a DictReader of the CSV file
for row in csv.DictReader(csvfile):
    # Pop the district from each row: district
    district = row.pop('District')
    # Append the rest of the data to the list for proper district in crimes_by_district
    crimes_by_district[district].append(row)

## 5) Determine the Arrests by District by Year

In [42]:
# Loop over the crimes_by_district using expansion as district and crimes
for district, crimes in crimes_by_district.items():
    # Print the district
    print(district)
    
    # Create an empty Counter object: year_count
    year_count = Counter()
    
    # Loop over the crimes:
    for crime in crimes:
         # If there was an arrest
        if crime['Arrest'] == 'true':
            # Convert the Date to a datetime and get the year
            year = datetime.strptime(crime['Date'], '%m/%d/%Y %I:%M:%S %p').year
            # Increment the Counter for the year
            year_count[year] += 1
            
    # Print the counter
    print(year_count)

14
Counter({2016: 59, 2017: 8})
24
Counter({2016: 51, 2017: 10})
6
Counter({2016: 157, 2017: 32})
15
Counter({2016: 154, 2017: 16})
12
Counter({2016: 72, 2017: 9})
7
Counter({2016: 181, 2017: 27})
1
Counter({2016: 124, 2017: 15})
11
Counter({2016: 275, 2017: 53})
18
Counter({2016: 92, 2017: 17})
22
Counter({2016: 78, 2017: 12})
5
Counter({2016: 149, 2017: 30})
16
Counter({2016: 66, 2017: 9})
9
Counter({2016: 116, 2017: 17})
8
Counter({2016: 124, 2017: 26})
3
Counter({2016: 98, 2017: 18})
2
Counter({2016: 84, 2017: 15})
19
Counter({2016: 88, 2017: 11})
10
Counter({2016: 144, 2017: 20})
4
Counter({2016: 134, 2017: 15})
17
Counter({2016: 38, 2017: 5})
20
Counter({2016: 27, 2017: 8})
25
Counter({2016: 150, 2017: 26})
31
Counter({2016: 1})
