In [56]:
import pandas as pd
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.image as img
# Shapely
import shapely.wkt as wkt
import shapely.geometry as geo
import shapely.ops as ops
import shapely.affinity as aff
from shapely.prepared import prep
import time
from datetime import date

In [2]:
#############
# CONSTANTS #
#############
X_MAX_PIXELS = 2048
Y_MAX_PIXELS = X_MAX_PIXELS
L_LINES = ['Green','Red','Brown','Purple','Yellow','Blue','Pink','Orange']
BUSINESS_CATEGORIES = ['Food Service', 'Tobacco Sale', 'Alcohol Consumption', 'Package Store', 'Gas Station']

In [3]:
################################
# IMPORT PREPROCESSED DATASETS #
################################
processed_dataset_paths_xlsx = '/Volumes/GoogleDrive/My Drive/Crime Data/Composite Data/Sean Workspace/Processed/%s.xlsx' 
processed_dataset_paths_csv = '/Volumes/GoogleDrive/My Drive/Crime Data/Composite Data/Sean Workspace/Processed/%s.csv' 

In [4]:
MMRP_permits = pd.read_excel(processed_dataset_paths_xlsx % 'MMRP Permits')

In [5]:
libraries = pd.read_excel(processed_dataset_paths_xlsx % 'Libraries')

In [6]:
schools = pd.read_csv(processed_dataset_paths_csv % 'Schools')

In [7]:
forests = pd.read_csv(processed_dataset_paths_csv % 'Forests')

In [8]:
parks = pd.read_csv(processed_dataset_paths_csv % 'Parks')

In [9]:
streets = pd.read_excel(processed_dataset_paths_xlsx % 'Streets')

In [10]:
waterways = pd.read_csv(processed_dataset_paths_csv % 'Waterways')

In [11]:
weather = pd.read_excel(processed_dataset_paths_xlsx % 'Weather')

In [12]:
life_expectancy = pd.read_excel(processed_dataset_paths_xlsx % 'Life Expectancy')

In [13]:
L_entries = pd.read_excel(processed_dataset_paths_xlsx % 'L Entries')

In [14]:
SBIF_grants = pd.read_excel(processed_dataset_paths_xlsx % 'SBIF Grants')

In [15]:
businesses = pd.read_excel(processed_dataset_paths_xlsx % 'Businesses')

In [16]:
buildings = pd.read_excel(processed_dataset_paths_xlsx % 'Buildings')

In [17]:
communities = pd.read_csv(processed_dataset_paths_csv % 'Communities')

In [18]:
# Building CNN Data
# Data that varies with Time and Location:
# - Crime (OUTPUT - YOLO with crime and location)
# - L entries (8 layers - one per line)
# - SBIF grants (1 layer)
# - MMRP Permits (21 layers - types of permits)
# - Life Expectancy (1 layer)
#
# Data that varies with Time Only:
# - Temperature (3 layers - MIN, MAX, and PRECIPITATION)
# - Date (43 layers - 12 for month, 31 for day)
# - Time (5 layers - one for each time slot)
#
# Data that varies with Location Only:
# - Businesses (5+ layers - types of businesses)
# - Buildings (10 layers - stories|units|sqfeet for sound|minor repair|major repair.  Also uninhabitable or not.)
# - Waterways (1 layer)
# - Major Streets (1 layer)
# - Libraries (1 layer)
# - Public Parks (1 layer)
# - Forests (1 layer)
# - Schools (1 layer)

# Total layers: 60 + (date layers)

In [23]:
#####################
# UTILITY FUNCTIONS #
#####################

# Preprocess necessary parameters for the transformations
polygons = []
for encoding in communities.Outline:
    polygons.extend(wkt.loads(encoding).geoms)
chicago = geo.MultiPolygon(polygons)
x_scale = float(X_MAX_PIXELS) / (chicago.bounds[2] - chicago.bounds[0])
y_scale = float(Y_MAX_PIXELS) / (chicago.bounds[3] - chicago.bounds[1])
x_off = -chicago.bounds[0] * x_scale
y_off = -chicago.bounds[1] * y_scale

# Transform geometry to the current frame
def transform_latlong_to_frame(geometry):
    return aff.affine_transform(geometry, [x_scale, 0, 0, y_scale, x_off, y_off])

# Return the points within the input polygon
def points_within(polygon):
    bounds = polygon.bounds
    offset_x = int(np.floor(bounds[0]))
    offset_y = int(np.floor(bounds[1]))
    width = int(np.ceil(bounds[2])-offset_x)
    height = int(np.ceil(bounds[3])-offset_y)
    pixel_x = []
    pixel_y = []
    for x in range(offset_x, offset_x+width):
        for y in range(offset_y, offset_y+height):
            if polygon.contains(geo.Point(x,y)):
                pixel_x.append(x)
                pixel_y.append(y)
    return (pixel_x, pixel_y)

# Apply the value in column_name of the given record at given coordinates of given frame 
def column_value_at_coordinate(record, column_name, frame):
    frame[record.Coordinates[0], record.Coordinates[1]] = record[column_name]

# Function for converting and placing weird format latlong into frame pixels
def mark_at_latlong(latlong, frame):
    lat, long = latlong.replace('(','').replace(')','').split(',')
    location = transform_latlong_to_frame(geo.Point(float(long), float(lat)))
    frame[int(location.x)][int(location.y)] = True

In [24]:
################
# EMPTY PIXELS #
################
pixel_points = []
for x in range(X_MAX_PIXELS):
    for y in range(Y_MAX_PIXELS):
        pixel_points.append(geo.Point(x,y))

In [306]:
#######################
# CREATE STREET FRAME #
#######################
# Load streets
street_lines = []
for encoding in streets.Centerline:
    street_lines.append(wkt.loads(encoding))
chicago_streets = transform_latlong_to_frame(geo.MultiLineString(street_lines))
# Buffer streets
buffered_streets = chicago_streets.buffer(0.5)
# Rasterize to numpy array
street_frame = np.array(list(map(prep(buffered_streets).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Street frame created')

Streets loaded and buffered
Hits calculated


In [320]:
##########################
# CREATE WATERWAYS FRAME #
##########################
# Load waterways
waterway_outlines = []
for encoding in waterways.Outline:
    waterway_outlines.extend(wkt.loads(encoding).geoms)
chicago_waterways = transform_latlong_to_frame(geo.MultiPolygon(waterway_outlines))
# Rasterize to numpy array
waterway_frame = np.array(list(map(prep(chicago_waterways).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Waterway frame created')

Waterways loaded
Waterway frame created


In [343]:
###############
# PARKS FRAME #
###############
# Load parks
park_outlines = []
for encoding in parks.Outline:
    park_outlines.append(wkt.loads(encoding))
chicago_parks = transform_latlong_to_frame(geo.MultiPolygon(park_outlines))
# Rasterize to numpy array
park_frame = np.array(list(map(prep(chicago_parks).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Park frame created')

Parks loaded
Park frame created


In [344]:
#################
# FORESTS FRAME #
#################
# Load forests
forest_outlines = []
for encoding in forests.Outline:
    forest = wkt.loads(encoding)
    if type(forest) is geo.Polygon:
        forest_outlines.append(forest)
    elif type(forest) is geo.MultiPolygon:
        forest_outlines.extend(forest.geoms)
chicago_forests = transform_latlong_to_frame(geo.MultiPolygon(forest_outlines))
# Rasterize to numpy array
forest_frame = np.array(list(map(prep(chicago_forests).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Forest frame created')

Forests loaded


In [534]:
#################
# SCHOOLS FRAME #
#################
# Load schools
school_outlines = []
for encoding in schools.Outline:
    school_outlines.append(wkt.loads(encoding))
chicago_schools = transform_latlong_to_frame(geo.MultiPolygon(school_outlines))
# Rasterize to numpy array
school_frame = np.array(list(map(prep(chicago_schools).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Schools frame created')

Schools frame created


In [533]:
###################
# BUILDINGS FRAME #
###################
# Load uninhabitable buildings
uninhabitable_building_outlines = []
for encoding in buildings.loc[buildings.Condition == 'UNINHABITABLE'].Footprint:
    building = wkt.loads(encoding)
    if type(building) is geo.Polygon:
        uninhabitable_building_outlines.append(building)
    elif type(building) is geo.MultiPolygon:
        uninhabitable_building_outlines.extend(building.geoms)
chicago_uninhabitable_buildings = transform_latlong_to_frame(geo.MultiPolygon(uninhabitable_building_outlines))
# Rasterize to numpy array
uninhabitable_building_frame = np.array(list(map(prep(chicago_uninhabitable_buildings).contains, pixel_points))).reshape((X_MAX_PIXELS,Y_MAX_PIXELS))
print('Uninhabitable buildings frame created.')

# Load habitable buildings
# Find out which pixels are inside each buildings' footprint
# Then, change the pixels in the frames accordingly

# SOUND BUILDINGS
sound_buildings = buildings[buildings.Condition == 'SOUND'].copy()
sound_buildings['Coordinates'] = sound_buildings.Footprint.apply(lambda x: points_within(transform_latlong_to_frame(wkt.loads(x))))

stories_of_sound_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
sound_buildings.apply(lambda building: column_value_at_coordinate(building, 'Stories', stories_of_sound_buildings_frame), axis=1)

units_of_sound_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
sound_buildings.apply(lambda building: column_value_at_coordinate(building, 'Units', units_of_sound_buildings_frame), axis=1)

area_of_sound_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
sound_buildings.apply(lambda building: column_value_at_coordinate(building, 'Square Footage', area_of_sound_buildings_frame), axis=1)
print('Sound building frames created.')

# MINOR REPAIR BUILDINGS
minor_repair_buildings = buildings[buildings.Condition == 'NEEDS MINOR REPAIR'].copy()
minor_repair_buildings['Coordinates'] = minor_repair_buildings.Footprint.apply(lambda x: points_within(transform_latlong_to_frame(wkt.loads(x))))

stories_of_minor_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
minor_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Stories', stories_of_minor_repair_buildings_frame), axis=1)

units_of_minor_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
minor_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Units', units_of_minor_repair_buildings_frame), axis=1)

area_of_minor_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
minor_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Square Footage', area_of_minor_repair_buildings_frame), axis=1)
print('Minor repair building frames created.')

# MAJOR REPAIR BUILDINGS
major_repair_buildings = buildings[buildings.Condition == 'NEEDS MAJOR REPAIR'].copy()
major_repair_buildings['Coordinates'] = major_repair_buildings.Footprint.apply(lambda x: points_within(transform_latlong_to_frame(wkt.loads(x))))

stories_of_major_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
major_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Stories', stories_of_major_repair_buildings_frame), axis=1)

units_of_major_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
major_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Units', units_of_major_repair_buildings_frame), axis=1)

area_of_major_repair_buildings_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
major_repair_buildings.apply(lambda building: column_value_at_coordinate(building, 'Square Footage', area_of_major_repair_buildings_frame), axis=1)
print('Major repair building frames created.')

Uninhabitable buildings frame created.
Sound building frames created.
Minor repair building frames created.
Major repair building frames created.


In [609]:
###################
# LIBRARIES FRAME #
###################

# Create empty frame
library_frame = np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))
# Load libraries
libraries.Location.apply(lambda library: mark_at_latlong(library, library_frame))
print('Libraries frame created.')

Libraries frame created.


In [605]:
#########################
# LIFE EXPECTANCY FRAME #
#########################
# Rasterize the communities
life_communities = communities.copy()
life_communities['Coordinates'] = communities.Outline.apply(lambda x: points_within(transform_latlong_to_frame(wkt.loads(x))))
# Assign rasterized community to each year
chicago_life_expectancy = life_expectancy.merge(life_communities, left_on='Community Area', right_on='Number')
# Create an empty numpy frame for each year
life_expectancy_frame = np.zeros((len(chicago_life_expectancy.Year.unique()), X_MAX_PIXELS, Y_MAX_PIXELS))
# Apply the data to the frames
for year in range(2001, 2019):
    chicago_life_expectancy[chicago_life_expectancy.Year == year].apply(lambda record: column_value_at_coordinate(record, 'Life Expectancy', life_expectancy_frame[2001-year]), axis=1)
print('Life expectancy frame created.')

Life expectancy frame created.


In [610]:
##################
# BUSINESS FRAME #
##################
# Create empty frame
business_frames = {'Food Service':np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS)),
                   'Tobacco Sale':np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS)),
                   'Alcohol Consumption':np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS)),
                   'Package Store':np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS)),
                   'Gas Station':np.zeros((X_MAX_PIXELS, Y_MAX_PIXELS))}
# Load businesses into frames
for column, frame in business_frames.items():
    businesses[businesses[column]==True].Location.apply(lambda business: mark_at_latlong(business, frame))
print('Business frames created.')

Business frames created.


In [59]:
###################
# L ENTRIES FRAME #
###################
# Transform points from latlong to frame 
translated_L_entries = L_entries.copy()
translated_L_entries['Location'] = L_entries.Location.apply(lambda point: transform_latlong_to_frame(wkt.loads(point)))
translated_L_entries['Date'] = pd.to_datetime(L_entries.Date)
L_entry_frames = {}
def update_L_entry(record, frame):
    # Get station pixel coordinates
    station_x = int(record.Location.x)
    station_y = int(record.Location.y)
    # Get record date
    num_days = (date(2018, 1, 1) - date(2001, 1, 1)).days
    record_date = date(record.Date.year, record.Date.month, record.Date.day)
    frame_index = (record.Date - date(2001, 1, 1)).days
    # Update the frame
    if station_x < X_MAX_PIXELS and station_x >= 0 and station_y < Y_MAX_PIXELS and station_y >= 0 and frame_index < num_days and frame_index >= 0:
        frame[frame_index][station_x][station_y] = frame[station_x][station_y] + record.Entries

# For each line, store the number of passengers at that station
num_days = (date(2018, 1, 1) - date(2001, 1, 1)).days
for line in L_LINES:
    this_line_frames = np.zeros((num_days, X_MAX_PIXELS, Y_MAX_PIXELS))
    this_line_entries = translated_L_entries[translated_L_entries[line + ' Line'] == True]
    this_line_entries.apply(lambda record: update_L_entry(record, this_line_frames), axis=1)
    L_entry_frames[line] = this_line_frames
print('L entry frames created.')

TypeError: ("unsupported operand type(s) for -: 'Timestamp' and 'datetime.date'", 'occurred at index 6393')

In [66]:
# timestamps = pd.to_datetime(L_entries.Date)
# date(timestamps[0].year, timestamps[0].month, timestamps[0].day)


TypeError: an integer is required (got type builtin_function_or_method)

In [None]:
['Food Service', 'Tobacco Sale', 'Alcohol Consumption', 'Package Store', 'Gas Station']
['Green','Red','Brown','Purple','Yellow','Blue','Pink','Orange']
np.savez_compressed('Input Frames',
                    Blue_Line = L_entry_frame['Green'],
                    Red_Line = L_entry_frame['Red'],
                    Brown_Line = L_entry_frame['Brown'],
                    Purple_Line = L_entry_frame['Purple'],
                    Yellow_Line = L_entry_frame['Yellow'],
                    Blue_Line = L_entry_frame['Blue'],
                    Pink_Line = L_entry_frame['Pink'],
                    Orange_Line = L_entry_frame['Orange'],
                    foodbusiness = business_frames,
                    life_expectancy = life_expectancy_frame,
                    library = library_frame,
                    )
L_entry_frames (8)
business_frames (5)
life_expectancy_frame
library_frame
uninhabitable_building_frame
stories_of_sound_buildings_frame
units_of_sound_buildings_frame
area_of_sound_buildings_frame
stories_of_minor_repair_buildings_frame
units_of_minor_repair_buildings_frame
area_of_minor_repair_buildings_frame
stories_of_major_repair_buildings_frame
units_of_major_repair_buildings_frame
area_of_major_repair_buildings_frame
school_frame
forest_frame
park_frame
waterway_frame
street_frame

In [44]:
# np.savez_compressed('inputs', street_frame=street_frame)
# np.load('inputs.npz')['street_frame']
# plt.imshow(L_entry_frames['Orange'][0])
img.imsave('L Orange.png', L_entry_frames['Orange'][0])

In [46]:
np.sum(L_entry_frames['Blue'][0])#.shape

0.0

In [41]:
translated_L_entries

Unnamed: 0,Date,Entries,Location,Green Line,Red Line,Brown Line,Purple Line,Yellow Line,Blue Line,Pink Line,Orange Line
0,01/01/2001,290,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
1,01/02/2001,1240,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
2,01/03/2001,1412,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
3,01/04/2001,1388,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
4,01/05/2001,1465,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
5,01/06/2001,613,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
6,01/07/2001,403,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
7,01/08/2001,1463,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
8,01/09/2001,1505,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
9,01/10/2001,1519,POINT (804.1248116100323 1224.720148145658),False,False,False,False,False,True,False,False
