In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import re

from datetime import datetime

In [7]:
!pip install shapely

Collecting shapely
[?25l  Downloading https://files.pythonhosted.org/packages/38/b6/b53f19062afd49bb5abd049aeed36f13bf8d57ef8f3fa07a5203531a0252/Shapely-1.6.4.post2-cp36-cp36m-manylinux1_x86_64.whl (1.5MB)
[K     |████████████████████████████████| 1.5MB 1.6MB/s eta 0:00:01
[?25hInstalling collected packages: shapely
Successfully installed shapely-1.6.4.post2


In [4]:
pd.set_option('display.max_columns', None)

In [5]:
SEVERITY_HIGH = 100
SEVERITY_MEDIUM = 50
SEVERITY_LOW  = 20

def str_contains_any_substr(string, substrings):
    for subs in substrings:
        if subs in string:
            return True
    return False

def crime_severity(x):
    """
    Converts the primary type and description into a crime severity value.
    This value is based on how much a particular crime would affect a user
    that is either:
    1. Renting short-term and wants to roam around the city
    2. Renting long-term and wants to find a good neighborhood to roam in
    """
    primary_type = x[0]
    description  = x[1]
    if (primary_type == 'THEFT'):
        if (description == 'PURSE-SNATCHING') or \
            (description == 'POCKET-PICKING') or \
            (description == 'FROM_BUILDING'):
            return SEVERITY_MEDIUM
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'BATTERY'):
        if 'AGG' in description and \
            'DOMESTIC' not in description:
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'NARCOTICS'):
        if 'DEL' in description or \
            'CONSPIRACY' in description:
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'OTHER OFFENSE'):
        if str_contains_any_substr(
            description, ['GUN', 'SEX', 'VOILENT', 'PAROLE', 'ARSON']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'ASSAULT'):
        if str_contains_any_substr(
            description, ['AGG']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'ROBBERY'):
        if str_contains_any_substr(
            description, ['AGG', 'ARMED']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'BURGLARY'):
        if str_contains_any_substr(
            description, ['INVASION']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'CRIMINAL TRESPASS'):
        if str_contains_any_substr(
            description, ['RESIDENCE']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'MOTOR VEHICLE THEFT'):
        if str_contains_any_substr(
            description, ['AUTO']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'WEAPONS VIOLATION'):
        if str_contains_any_substr(
            description, ['USE', 'SALE']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'CONCEALED CARRY LICENSE VIOLATION'):
        if str_contains_any_substr(
            description, ['INFLUENCE']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'PUBLIC PEACE VIOLATION'):
        if str_contains_any_substr(
            description, ['RECKLESS', 'MOB', 'ARSON', 'BOMB']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_MEDIUM
        
    if (primary_type == 'INTERFERENCE WITH PUBLIC OFFICER'):
        if str_contains_any_substr(
            description, ['OBSTRUCT']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'STALKING'):
        if str_contains_any_substr(
            description, ['AGG']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'SEX OFFENSE'):
        if str_contains_any_substr(
            description, ['CRIM', 'CHILD', 'INDECEN']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
        
    if (primary_type == 'LIQUOR LAW VIOLATION'):
        if str_contains_any_substr(
            description, ['MINOR']):
            return SEVERITY_HIGH
        else:
            return SEVERITY_LOW
    
    if (primary_type == 'HOMICIDE') or \
        (primary_type == 'CRIM SEXUAL ASSAULT') or \
        (primary_type == 'ARSON') or \
        (primary_type == 'OFFENSE INVOLVING CHILDREN') or \
        (primary_type == 'PROSTITUTION') or \
        (primary_type == 'KIDNAPPING') or \
        (primary_type == 'HUMAN TRAFFICKING') or \
        (primary_type == 'NON-CRIMINAL (SUBJECT SPECIFIED)'):
        return SEVERITY_HIGH

    if (primary_type == 'INTIMIDATION') or \
        (primary_type == 'OTHER NARCOTIC VIOLATION') or \
        (primary_type == 'OBSCENITY') or \
        (primary_type == 'PUBLIC INDECENCY'):
        return SEVERITY_MEDIUM

    if (primary_type == 'DECEPTIVE PRACTICE') or \
        (primary_type == 'CRIMINAL DAMAGE') or \
        (primary_type == 'NON-CRIMINAL') or \
        (primary_type == 'GAMBLING'):
        return SEVERITY_LOW

    raise ValueError(f'Could not find severity for "{primary_type}" and "{description}"')

In [6]:
def wrangle(crime_filename, census_pop_filename, census_geom_filename):
    df = pd.read_csv(crime_filename, header=0)
    df['Severity'] = df[['Primary Type', 'Description']].apply(crime_severity, axis=1)
    
    # Find the number of null values per column
    df.isnull().sum()
    print('Crime dataset shape before dropping:', df.shape)
    
    # Drop any row with any null values
    df = df.dropna(axis=0, how='any')
    df = df.reset_index(drop=True)
    print('Crime dataset shape after dropping:', df.shape)

    # Remove columns we're not interested in
    df = df.drop(columns=['ID', 'Case Number', 
           'Location Description', 'Arrest', 'Domestic', 'Beat',
           'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
           'Y Coordinate', 'Year', 'Updated On',
           'Location', 'Historical Wards 2003-2015', 'Zip Codes',
           'Community Areas', 'Wards', 'Boundaries - ZIP Codes',
           'Police Districts', 'Police Beats'])

    # Keep the IUCR, and remove the crime type/description since it is redundant
    df = df.rename({'IUCR': 'Crime Type ID'}, axis=1)
    df['Crime Type'] = df['Primary Type'] + ' | ' + df['Description']
    df = df.drop(columns=['Primary Type', 'Description'])

    # Extract block number prefix
    pattern = re.compile('(\d*?)(XX|X)\s+(.*)?')
    def get_block_num(x):
        match = pattern.match(x)
        return match.group(1)

    df['Block ID'] = df['Block'].apply(lambda x: get_block_num(x))
    df = df.drop(columns=['Block'])
    df['Census Tracts'] = df['Census Tracts'].astype(int)

    # Get the census population table
    census_pop = pd.read_csv(census_pop_filename, header=0)
    print('census_pop.shape:', census_pop.shape)
    census_pop = census_pop.drop(columns=['CENSUS BLOCK'])
    census_pop.head()

    # Get the census geometry table
    census_geom = pd.read_csv(census_geom_filename, header=0)
    print('census_geom.shape:', census_geom.shape)
    census_geom = census_geom.drop(columns=['STATEFP10', 'COUNTYFP10', 
                                            'NAME10', 'TRACT_BLOC'])
    census_geom = census_geom.rename({'GEOID10': 'CENSUS BLOCK FULL'}, axis=1)
    census_geom.head()

    # Merge the census population and census geometry tables
    blocks = census_pop.merge(census_geom, on='CENSUS BLOCK FULL')
    print('blocks.shape:', blocks.shape)
    blocks.head()

    blocks.BLOCKCE10 = blocks.BLOCKCE10.astype(str) \
                            .apply(lambda x: '0' + x if len(x) == 4 else '00' + x)
    blocks.dtypes

    print('crimes dataset shape:', df.shape)
    df.head()
    return df, blocks

In [7]:
crimes, blocks = wrangle('data/Chicago/Crimes_1year.csv',
                         'data/Chicago/Population_by_2010_Census_Block.csv',
                         'data/Chicago/CensusBlockTIGER2010.csv')

Crime dataset shape before dropping: (264154, 31)
Crime dataset shape after dropping: (259204, 31)
census_pop.shape: (46291, 3)
census_geom.shape: (46357, 8)
blocks.shape: (46337, 5)
crimes dataset shape: (259204, 8)


In [8]:
print('crimes.shape:', crimes.shape)
crimes.head(15)

crimes.shape: (259204, 8)


Unnamed: 0,Date,Crime Type ID,Latitude,Longitude,Census Tracts,Severity,Crime Type,Block ID
0,04/15/2019 11:59:00 PM,0820,42.017499,-87.668188,357,20,THEFT | $500 AND UNDER,74
1,04/15/2019 11:57:00 PM,1090,41.870014,-87.726102,703,100,ARSON | ATTEMPT ARSON,40
2,04/15/2019 11:55:00 PM,0460,41.691414,-87.668826,378,50,BATTERY | SIMPLE,19
3,04/15/2019 11:51:00 PM,0486,41.755613,-87.634421,495,50,BATTERY | DOMESTIC BATTERY SIMPLE,76
4,04/15/2019 11:47:00 PM,1822,41.753902,-87.636814,495,100,NARCOTICS | MANU/DEL:CANNABIS OVER 10 GMS,77
5,04/15/2019 11:45:00 PM,502P,41.760156,-87.556753,421,20,OTHER OFFENSE | FALSE/STOLEN/ALTERED TRP,75
6,04/15/2019 11:45:00 PM,0486,41.748625,-87.607588,11,50,BATTERY | DOMESTIC BATTERY SIMPLE,80
7,04/15/2019 11:43:00 PM,0560,41.691323,-87.652787,663,50,ASSAULT | SIMPLE,12
8,04/15/2019 11:35:00 PM,0320,41.80945,-87.612187,447,50,ROBBERY | STRONGARM - NO WEAPON,5
9,04/15/2019 11:32:00 PM,2022,41.793431,-87.709047,801,50,NARCOTICS | POSS: COCAINE,34


In [9]:
print('blocks.shape:', blocks.shape)
blocks.head()

blocks.shape: (46337, 5)


Unnamed: 0,CENSUS BLOCK FULL,TOTAL POPULATION,the_geom,TRACTCE10,BLOCKCE10
0,170310101001000,128,MULTIPOLYGON (((-87.66635499979151 42.02252199...,10100,1000
1,170310101001001,71,MULTIPOLYGON (((-87.66753999955125 42.02223700...,10100,1001
2,170310101001002,45,MULTIPOLYGON (((-87.67008600039445 42.02226200...,10100,1002
3,170310101001003,335,MULTIPOLYGON (((-87.67009499920478 42.02114900...,10100,1003
4,170310101002000,152,MULTIPOLYGON (((-87.67188399967968 42.02298600...,10100,2000


### Get data from database

In [1]:
!pip install --upgrade pip
!pip install python-decouple
!pip install geoalchemy2

Requirement already up-to-date: pip in /home/ec2-user/anaconda3/envs/tensorflow_p36/lib/python3.6/site-packages (19.1)


In [31]:
"""Contains models for DB."""

from sqlalchemy.ext.declarative import declarative_base
from sqlalchemy import Column, BigInteger, Integer, String, DateTime, ForeignKey, Float
from sqlalchemy.orm import relationship
from geoalchemy2 import Geometry


BASE = declarative_base()


class City(BASE):
    """City model for DB. Has information of cities."""
    __tablename__ = 'city'
    id            = Column(BigInteger, primary_key=True)
    city          = Column(String, unique=False, nullable=False)
    state         = Column(String, unique=False, nullable=True)
    country       = Column(String, unique=False, nullable=False)
    location      = Column(Geometry(geometry_type='POINT'), nullable=False)
    blocks        = relationship("Blocks", back_populates="city")
    zipcodes      = relationship("ZipcodeGeom", back_populates="city")
    incidents     = relationship("Incident", back_populates="city")


class Blocks(BASE):
    """Block model for DB. Has information of city blocks for a related city
        id."""
    __tablename__ = 'block'
    id            = Column(BigInteger, primary_key=True)
    cityid        = Column(BigInteger, ForeignKey('city.id'), nullable=False)
    shape         = Column(Geometry(geometry_type='MULTIPOLYGON'), nullable=False)
    population    = Column(Integer, nullable=False)
    city          = relationship("City", back_populates="blocks")
    incidents     = relationship("Incident", back_populates="block")

class ZipcodeGeom(BASE):
    """Zipcode geometry model for DB. Has information of zipcodes and related
        city id."""
    __tablename__ = 'zipcodegeom'
    id            = Column(BigInteger, primary_key=True)
    cityid        = Column(BigInteger, ForeignKey('city.id'), nullable=False)
    zipcode       = Column(String, nullable=False, unique=True)
    shape         = Column(Geometry(geometry_type='MULTIPOLYGON'), nullable=False)
    city          = relationship("City", back_populates="zipcodes")

class Incident(BASE):
    """Incident model for DB. Has information of a specific crime, including
        where it took place, when it took place, and the type of crime that
        occurred."""
    __tablename__ = 'incident'
    id            = Column(BigInteger, primary_key=True)
    crimetypeid   = Column(BigInteger, ForeignKey('crimetype.id'), nullable=False)
    locdescid     = Column(BigInteger, ForeignKey('locdesctype.id'), nullable=False)
    cityid        = Column(BigInteger, ForeignKey('city.id'), nullable=False)
    blockid       = Column(BigInteger, ForeignKey('block.id'), nullable=False)
    location      = Column(Geometry(geometry_type='POINT'), nullable=False)
    datetime      = Column(DateTime, nullable=False)
    hour          = Column(Integer, nullable=False)
    dow           = Column(Integer, nullable=False)
    month         = Column(Integer, nullable=False)
    year          = Column(Integer, nullable=False)
    city          = relationship("City", back_populates="incidents")
    block         = relationship("Blocks", back_populates="incidents")
    crimetype     = relationship("CrimeType", back_populates="incidents")
    locationdesc  = relationship("LocationDescriptionType", back_populates="incidents")

class CrimeType(BASE):
    """CrimeType model for DB. Has information of the types of crime, including
        a general description and the numerical severity of the crime."""
    __tablename__ = 'crimetype'
    id            = Column(BigInteger, primary_key=True)
    category      = Column(String, unique=True, nullable=False)
    severity      = Column(Integer, nullable=False)
    incidents     = relationship("Incident", back_populates="crimetype")


class LocationDescriptionType(BASE):
    """Location description model for DB. Has information on the type of
        location that the crime took place."""
    __tablename__ = 'locdesctype'
    id            = Column(BigInteger, primary_key=True)
    key1          = Column(String, nullable=False)
    key2          = Column(String, nullable=False)
    key3          = Column(String, nullable=False)
    incidents     = relationship("Incident", back_populates="locationdesc")

In [44]:
from sqlalchemy import create_engine, func, text
from sqlalchemy.orm import sessionmaker
from decouple import config
from shapely import wkb, wkt
from shapely.geometry import Point
from geoalchemy2.shape import to_shape 

import pandas as pd
import json
import datetime
import re

In [4]:
# Connect to DB and create session with DB
DB_URI  = config('DB_URI')
ENGINE  = create_engine(DB_URI)
Session = sessionmaker(bind=ENGINE)
SESSION = Session()

  """)


In [89]:
# crime_type = CrimeType(id=2909292029029, category='a', severity=100)
# crime_type.__dict__

In [87]:
# q = SESSION.query(Incident) \
#         .join(Blocks, Incident.blockid >= Blocks.id)
# q.column_descriptions

In [88]:
# q = SESSION.query(Incident) \
#         .join(CrimeType, Incident.crimetypeid >= CrimeType.id)
# q.column_descriptions

In [86]:
# Have to filter by city, after I figure how that is done.
#         .filter(Incident.block.City == 'CHICAGO') \
pattern = re.compile('POINT \(([\d\-\.]+) ([\d\-\.]+)\)')
for row in SESSION.query(Incident) \
        .filter(Incident.year >= 2016, \
                Incident.year <= 2016) \
        .filter(Incident.month == 11) \
        .filter(Incident.dow == 1) \
        .outerjoin(Blocks, Incident.blockid == Blocks.id) \
        .outerjoin(CrimeType, Incident.crimetypeid == CrimeType.id) \
        .add_columns(Incident.location, Incident.hour, 
                     Incident.dow, Incident.month, 
                     Incident.year, CrimeType.severity,
                     Blocks.population):
    shply_geom = to_shape(row.location)
    matches = pattern.match(shply_geom.to_wkt())
    longitude, latitude = matches[1], matches[2]

    print(latitude, longitude, row.hour, row.dow, row.month, 
          row.year, row.population, row.severity)

41.8202657880000004 -87.6148357060000080 11 1 11 2016 1680 1
41.8873745819999996 -87.6792565020000012 6 1 11 2016 2796 2
41.8857002439999988 -87.6417268539999981 9 1 11 2016 4872 2
41.8153829859999959 -87.6388081690000007 20 1 11 2016 1393 3
42.0119203309999989 -87.6630205139999958 17 1 11 2016 4091 3
41.9374129990000029 -87.6588800159999977 23 1 11 2016 3976 2
41.9242701930000052 -87.6357911770000015 10 1 11 2016 4570 2
41.8883200329999994 -87.6280330060000097 23 1 11 2016 3545 2
41.9395051159999994 -87.6838072470000043 23 1 11 2016 2117 3
41.7546356410000001 -87.5508896200000066 0 1 11 2016 6418 1
41.7515238840000009 -87.5826017870000015 0 1 11 2016 4944 3
41.7786424130000000 -87.6945951459999975 0 1 11 2016 6333 3
41.7765210459999992 -87.6872288489999931 0 1 11 2016 6333 3
41.7364879069999972 -87.6156823289999949 0 1 11 2016 4757 2
41.8787984370000004 -87.7313764740000011 0 1 11 2016 2478 3
41.8260684509999976 -87.6323636700000037 0 1 11 2016 907 3
41.7358571660000024 -87.6241528609

41.9495459130000015 -87.8022405129999868 14 1 11 2016 5129 2
41.8016348749999977 -87.6161478289999991 15 1 11 2016 1761 3
41.9618933809999959 -87.7168327710000000 14 1 11 2016 6187 2
41.8414460069999947 -87.6554628820000090 9 1 11 2016 2738 1
41.8638489699999994 -87.6920553789999957 16 1 11 2016 1228 3
41.8677459530000036 -87.7044881310000051 9 1 11 2016 1598 2
41.8561007349999983 -87.7199715320000024 17 1 11 2016 2268 1
41.8694656500000022 -87.6537387289999970 16 1 11 2016 2278 2
41.7068272309999983 -87.6526526950000004 12 1 11 2016 3614 3
41.7170946620000009 -87.6673274029999874 15 1 11 2016 4016 2
41.6554817099999966 -87.6023256849999967 16 1 11 2016 3373 3
41.8751966560000000 -87.7311238720000119 16 1 11 2016 2478 1
41.7683063540000035 -87.6851008600000057 17 1 11 2016 4232 3
41.9027081810000013 -87.7218656170000060 16 1 11 2016 6364 3
41.8763156000000052 -87.6692716239999896 14 1 11 2016 1713 2
41.7945095080000044 -87.6333017379999859 15 1 11 2016 1199 1
41.7847970519999947 -87.64

41.7583470340000034 -87.5563092470000015 23 1 11 2016 6418 3
41.7881330199999965 -87.6992055000000050 21 1 11 2016 7107 3
41.7664178719999981 -87.6541528810000017 22 1 11 2016 2272 1
41.7598592610000026 -87.5920452060000088 22 1 11 2016 2892 3
41.6537395180000019 -87.6062936590000021 19 1 11 2016 3373 3
41.7053003050000015 -87.6265615220000029 20 1 11 2016 5247 3
41.7123599040000030 -87.6200930899999975 21 1 11 2016 4007 2
41.7670585380000006 -87.6072406820000111 19 1 11 2016 1380 3
41.9104062850000005 -87.7630309279999921 23 1 11 2016 4725 1
41.7040950280000047 -87.5657334070000104 23 1 11 2016 3233 3
41.7311922469999956 -87.6036184529999957 12 1 11 2016 1800 2
41.7338557839999993 -87.6209214729999957 18 1 11 2016 2080 2
41.7554946069999957 -87.6429432799999972 21 1 11 2016 1275 1
41.7264150580000006 -87.6287882379999985 22 1 11 2016 3632 3
41.6902687840000041 -87.6083648229999881 0 1 11 2016 1666 2
41.8500608969999988 -87.6806864899999994 23 1 11 2016 4463 3
41.7355639919999959 -87.6

41.6922816060000017 -87.6133619760000073 21 1 11 2016 2931 3
41.7231265050000033 -87.6250767779999933 9 1 11 2016 3632 3
41.6809121510000011 -87.6311554499999943 15 1 11 2016 5152 2
41.9709420780000002 -87.7048788520000073 16 1 11 2016 3729 2
41.6714516159999988 -87.6368876679999858 8 1 11 2016 5402 3
41.8549172469999959 -87.6563374189999962 17 1 11 2016 1700 2
41.8708243780000018 -87.7185641049999987 19 1 11 2016 1689 2
41.8966448520000014 -87.6305235579999930 14 1 11 2016 3102 2
41.9661480520000012 -87.7517640179999887 22 1 11 2016 7540 2
41.9437688969999982 -87.8326832589999924 23 1 11 2016 2498 2
41.6981456150000014 -87.6167752039999925 0 1 11 2016 3891 2
41.9289474010000021 -87.7094144449999931 18 1 11 2016 5064 2
41.9363081240000000 -87.7306117689999923 0 1 11 2016 3035 2
41.9007694299999969 -87.6269446429999874 13 1 11 2016 4059 2
41.9467023699999970 -87.6561170760000010 23 1 11 2016 1798 2
41.9291450150000031 -87.7094198690000013 18 1 11 2016 5064 2
41.8107072980000041 -87.6039

41.8662546099999986 -87.7189676739999982 12 1 11 2016 4484 3
41.9243934380000027 -87.7344242789999953 12 1 11 2016 5427 3
41.8745974969999963 -87.7533401020000099 11 1 11 2016 2953 3
41.7370943049999994 -87.5729981779999918 12 1 11 2016 2033 3
41.7652996340000016 -87.6116261620000074 12 1 11 2016 4520 1
41.7661023869999966 -87.5735391690000000 12 1 11 2016 1665 3
41.7709691649999968 -87.5862682279999945 12 1 11 2016 4680 3
41.7662548469999990 -87.5746003889999969 11 1 11 2016 3689 3
41.8876557180000049 -87.6780270109999975 9 1 11 2016 2796 2
41.8925487300000015 -87.7223298060000047 10 1 11 2016 6039 2
41.9909177629999988 -87.7176599900000014 9 1 11 2016 5133 2
41.8662155379999987 -87.7219307879999945 13 1 11 2016 4484 1
41.8750432759999995 -87.7433414120000066 12 1 11 2016 4609 1
41.8598369130000023 -87.7029687699999982 11 1 11 2016 1892 3
41.7515398809999994 -87.5812712979999901 12 1 11 2016 4944 3
41.7263783249999989 -87.5546394629999867 8 1 11 2016 3206 2
41.9071067490000004 -87.726

41.7505470259999996 -87.5978949250000056 22 1 11 2016 3746 3
41.9246579530000005 -87.7124106560000030 22 1 11 2016 4870 3
41.8217676860000012 -87.7003339150000016 22 1 11 2016 5067 3
41.7666633629999993 -87.7248966749999965 10 1 11 2016 4421 2
41.7608479760000009 -87.6636790429999877 21 1 11 2016 3735 3
41.7682499149999984 -87.6887268110000093 22 1 11 2016 4232 3
41.7657555709999997 -87.5762924270000127 23 1 11 2016 3604 3
41.8845691249999987 -87.6883264370000006 21 1 11 2016 2796 3
41.7494708080000052 -87.6694774829999943 20 1 11 2016 4123 1
41.9297438179999986 -87.6842737770000014 23 1 11 2016 2877 2
41.9414408439999988 -87.8330947620000018 19 1 11 2016 2498 2
42.0069190409999962 -87.6901164349999931 21 1 11 2016 4650 1
41.8900870350000005 -87.7050182810000081 22 1 11 2016 2397 1
41.8569186909999971 -87.7335125229999875 4 1 11 2016 4140 3
41.7790128210000020 -87.6642086920000025 21 1 11 2016 1252 3
41.7250655349999988 -87.6809602950000055 5 1 11 2016 3300 2
41.7262485040000044 -87.65

41.8686249260000025 -87.6258557700000011 13 1 11 2016 4256 2
41.7972708349999991 -87.6012556929999988 11 1 11 2016 2557 1
41.7617792269999981 -87.6892827320000094 9 1 11 2016 5706 1
41.8931616209999973 -87.6777136339999998 11 1 11 2016 2204 2
41.9402578939999984 -87.6947233189999906 9 1 11 2016 4028 2
41.6990776080000032 -87.6389515179999989 18 1 11 2016 4514 2
41.8162292669999971 -87.6018395420000076 19 1 11 2016 1840 2
41.7909088890000007 -87.6037336320000009 19 1 11 2016 1529 2
41.7036685249999977 -87.7122526649999941 10 1 11 2016 6547 1
41.6940932640000028 -87.6055830860000100 17 1 11 2016 2015 2
41.8058569819999946 -87.6332809220000115 12 1 11 2016 1199 1
41.9687810180000014 -87.7354501370000008 10 1 11 2016 6805 2
41.9117618690000029 -87.7867155999999937 14 1 11 2016 7538 2
41.7454333000000020 -87.7034881350000006 9 1 11 2016 5923 1
41.8855382319999947 -87.6157684150000051 8 1 11 2016 10848 2
41.7539063660000025 -87.6113580029999923 9 1 11 2016 3611 2
41.8529015209999997 -87.6282

41.7967851140000022 -87.6670897429999911 6 1 11 2016 2139 3
41.8814659390000017 -87.6277521870000129 13 1 11 2016 2518 3
41.8857020789999979 -87.6420316439999993 12 1 11 2016 4872 1
41.9413331760000005 -87.7651298670000131 8 1 11 2016 5458 2
41.7500737659999999 -87.6428034749999938 13 1 11 2016 2198 1
41.7552783769999962 -87.6344135620000060 12 1 11 2016 2847 1
41.9122128119999999 -87.7325174959999998 13 1 11 2016 3570 2
41.9249011790000026 -87.7696705929999865 13 1 11 2016 5516 3
41.8828338940000009 -87.7024967389999972 13 1 11 2016 1819 2
41.9462137940000019 -87.6598312120000145 14 1 11 2016 1707 2
41.8622182650000028 -87.6390871640000029 11 1 11 2016 4865 2
41.8670713710000015 -87.7259978310000008 13 1 11 2016 2378 3
41.8551905509999997 -87.6238711950000067 13 1 11 2016 16735 3
41.6645929140000035 -87.6390586600000034 11 1 11 2016 5345 3
41.7066785359999983 -87.6633076129999864 14 1 11 2016 4016 3
41.8622594260000014 -87.6390867189999909 8 1 11 2016 4865 2
41.9314900660000021 -87.74

41.9598955909999987 -87.7082044950000039 23 1 11 2016 3490 1
41.8783513910000025 -87.7451983899999988 20 1 11 2016 4609 1
41.8020089959999979 -87.6220301929999863 22 1 11 2016 1761 1
41.8655467110000004 -87.7195031139999912 21 1 11 2016 4484 1
41.7830186939999990 -87.6084429289999918 22 1 11 2016 2624 1
41.7847713889999994 -87.5881430050000063 20 1 11 2016 1441 1
41.9761485259999958 -87.6891610649999933 22 1 11 2016 3985 1
41.7680576330000051 -87.5687727029999934 22 1 11 2016 2963 1
41.8929152400000007 -87.7252021379999860 13 1 11 2016 6039 2
41.7749489220000001 -87.6628651529999985 20 1 11 2016 1259 3
41.9109141750000020 -87.6485783429999969 22 1 11 2016 2603 3
41.9064592860000005 -87.7706594060000072 21 1 11 2016 4422 3
41.7977200899999985 -87.6176767670000061 15 1 11 2016 1567 2
41.7737346480000014 -87.6875500950000060 21 1 11 2016 5865 3
41.9042438090000005 -87.6678618870000008 21 1 11 2016 2872 2
41.8644461800000016 -87.7169254450000011 3 1 11 2016 4484 1
41.9051975109999972 -87.6

41.7364886160000026 -87.5458192779999962 18 1 11 2016 3314 1
41.8967861480000039 -87.6356936539999936 20 1 11 2016 6866 2
41.7947800760000021 -87.6938298510000038 9 1 11 2016 7083 2
41.9148667449999976 -87.7003847610000093 16 1 11 2016 1410 3
41.7428742119999967 -87.6179306939999947 14 1 11 2016 4757 2
41.9045976140000036 -87.7129173149999986 12 1 11 2016 5399 2
41.9141988940000019 -87.6541020599999996 16 1 11 2016 3727 1
41.7598702029999984 -87.6363209679999926 12 1 11 2016 2936 3
41.9543496160000018 -87.7292359700000048 13 1 11 2016 3785 2
41.8912702280000033 -87.6280556429999962 8 1 11 2016 3545 2
41.8892172409999972 -87.6306931239999898 15 1 11 2016 3102 2
41.7929016599999983 -87.7513965289999902 9 1 11 2016 0 2
41.8842861039999974 -87.6479924079999932 10 1 11 2016 2768 2
41.9421975400000022 -87.6882495159999991 10 1 11 2016 1503 1
41.9367994880000055 -87.6442917039999969 17 1 11 2016 2173 1
41.9768938280000015 -87.6826367620000013 19 1 11 2016 3985 1
41.8345107450000029 -87.668049

42.0110722330000002 -87.6869174820000126 11 1 11 2016 4168 1
41.9497345030000020 -87.6556144439999940 9 1 11 2016 1379 2
41.7931952019999997 -87.6876884979999858 9 1 11 2016 5369 1
41.8798601270000006 -87.7713848729999881 12 1 11 2016 1370 1
41.8783613609999961 -87.6838943470000061 11 1 11 2016 942 3
41.9046231680000005 -87.7337421379999967 11 1 11 2016 6818 1
41.7513586199999978 -87.5601645450000063 11 1 11 2016 3803 3
41.9405406050000025 -87.6444168279999900 13 1 11 2016 2569 1
41.9630707940000036 -87.6559842129999964 11 1 11 2016 4247 2
41.8938003680000008 -87.6311828049999946 11 1 11 2016 3102 2
41.6601132819999975 -87.6389164810000096 0 1 11 2016 5345 1
41.8948966410000025 -87.7598853160000090 10 1 11 2016 4196 1
41.8823940619999959 -87.6278447979999981 11 1 11 2016 4350 2
41.6778100060000014 -87.6423026450000009 9 1 11 2016 4273 3
41.8994581669999988 -87.7594551460000076 2 1 11 2016 4196 2
41.7260251450000013 -87.6542780799999974 12 1 11 2016 5941 3
41.6861724269999954 -87.616827

41.7355457909999998 -87.6460733829999867 19 1 11 2016 2654 1
41.8802814160000025 -87.7622155999999904 17 1 11 2016 5731 3
41.8919220310000000 -87.7710147220000039 17 1 11 2016 3983 3
41.7484657640000023 -87.6688941760000091 15 1 11 2016 4123 3
41.8774456240000035 -87.7451663039999943 16 1 11 2016 4609 3
41.7329022619999961 -87.6110510400000067 13 1 11 2016 1545 2
41.7484657640000023 -87.6688941760000091 15 1 11 2016 4123 2
41.7521460520000005 -87.6076799639999990 19 1 11 2016 3611 1
41.8967293549999980 -87.6830618279999925 6 1 11 2016 2764 2
41.8865211170000009 -87.6659011299999946 16 1 11 2016 2768 1
41.8865211170000009 -87.6659011299999946 17 1 11 2016 2768 2
41.7799602010000015 -87.6320997779999971 17 1 11 2016 3778 2
41.8668107390000017 -87.6258170309999969 18 1 11 2016 16735 3
41.8784013799999997 -87.7605175150000036 18 1 11 2016 6438 3
41.7634542790000012 -87.5653048850000033 18 1 11 2016 2210 1
41.9605448289999998 -87.6543711550000069 13 1 11 2016 7052 2
41.8668436680000013 -87.

41.7790128210000020 -87.6642086920000025 10 1 11 2016 1252 3
41.7733205680000026 -87.5874653820000049 10 1 11 2016 2336 2
41.7896069119999964 -87.6559726360000013 15 1 11 2016 1516 2
41.9800717440000000 -87.6682939709999971 20 1 11 2016 4326 1
41.7472925380000035 -87.7230604620000065 18 1 11 2016 5728 3
41.7828896540000017 -87.5886964410000104 18 1 11 2016 1441 2
42.0114735209999992 -87.6711424860000079 21 1 11 2016 5143 2
41.7898171319999960 -87.6741840770000067 23 1 11 2016 1321 1
41.9584991950000017 -87.6486243049999985 19 1 11 2016 5150 2
41.9372276679999985 -87.6443054710000098 18 1 11 2016 6345 2
41.9040646839999980 -87.7092710479999909 8 1 11 2016 5399 2
42.0184340169999970 -87.6901844060000002 14 1 11 2016 3965 2
42.0001413029999995 -87.6814323899999977 11 1 11 2016 6214 2
41.8418251840000011 -87.6232922179999889 20 1 11 2016 2545 1
41.9732634809999965 -87.6584357220000072 12 1 11 2016 5648 3
41.7640363189999988 -87.6612376810000029 16 1 11 2016 1378 1
41.8195360650000012 -87.6

41.7452852309999969 -87.6050675620000021 12 1 11 2016 3127 3
41.7989475430000041 -87.6647165079999979 11 1 11 2016 2045 1
41.8957333050000003 -87.6874574790000025 10 1 11 2016 1436 2
41.7696709740000003 -87.6566662870000073 10 1 11 2016 2272 2
41.9283534869999954 -87.6421262420000033 13 1 11 2016 1709 2
41.7332535310000026 -87.6678566079999939 10 1 11 2016 1626 1
41.8832086239999981 -87.6864892970000085 13 1 11 2016 2796 1
41.8821514169999958 -87.7061573020000083 13 1 11 2016 1442 3
41.8792489650000022 -87.6968550179999937 9 1 11 2016 2085 2
41.9366653630000030 -87.6685540220000092 13 1 11 2016 2226 3
41.9884171990000041 -87.7857881079999913 15 1 11 2016 5313 2
41.8931529549999979 -87.7466309250000052 12 1 11 2016 1495 3
41.8866650739999997 -87.7650893810000099 10 1 11 2016 5731 3
41.7962823650000033 -87.7019783350000068 13 1 11 2016 7083 3
41.8812031820000001 -87.6473423329999974 12 1 11 2016 4844 2
41.8424168909999992 -87.6853148970000120 9 1 11 2016 4463 2
41.8700012729999997 -87.73

41.7683829190000040 -87.6444950020000135 18 1 11 2016 3517 3
41.8811836639999981 -87.6850812520000034 16 1 11 2016 2796 3
41.8363852310000013 -87.6657104100000026 18 1 11 2016 3326 3
41.9131505949999976 -87.6398691440000022 18 1 11 2016 1652 3
41.7509407569999951 -87.6251852220000131 19 1 11 2016 2847 1
41.6939606709999993 -87.6216584679999926 19 1 11 2016 3891 1
41.8833266930000008 -87.7288111160000028 16 1 11 2016 1136 2
41.9056976419999998 -87.6933412839999988 11 1 11 2016 2047 2
41.8745785069999954 -87.6740372109999981 13 1 11 2016 1713 1
41.9126244409999984 -87.6884996220000090 4 1 11 2016 1457 1
41.8895614280000004 -87.6201966510000005 1 1 11 2016 6229 2
41.7044766050000035 -87.6464080110000054 17 1 11 2016 2331 3
41.7987559669999982 -87.6878380610000079 12 1 11 2016 6364 1
41.8834844820000001 -87.6576664239999985 8 1 11 2016 2768 2
41.8803583750000001 -87.6552684250000027 14 1 11 2016 6895 2
41.9110963150000018 -87.7034051939999983 20 1 11 2016 1623 1
41.8656026000000026 -87.716

41.7275289780000023 -87.5633077739999948 13 1 11 2016 3206 2
41.9157506649999974 -87.6885967470000054 10 1 11 2016 3181 2
41.9113578059999980 -87.7541132779999913 10 1 11 2016 2669 2
41.9214800959999963 -87.7103764170000062 22 1 11 2016 4870 2
41.7712200419999959 -87.6700634360000066 16 1 11 2016 2688 2
41.9418180330000041 -87.7469458639999971 22 1 11 2016 4567 2
41.6940825040000007 -87.6507915739999959 10 1 11 2016 4147 1
41.9354287549999967 -87.7834005979999858 15 1 11 2016 4257 3
41.7533119090000042 -87.6683677670000066 21 1 11 2016 5651 1
41.9454651599999977 -87.6501679600000045 9 1 11 2016 1798 1
41.7470583659999974 -87.6051171479999908 12 1 11 2016 3746 2
41.7506251189999986 -87.6015294110000013 13 1 11 2016 3746 3
41.7432279499999979 -87.6526677840000019 3 1 11 2016 3475 2
41.8122282260000020 -87.7072800699999959 17 1 11 2016 4795 1
41.9350580489999984 -87.6806512349999991 11 1 11 2016 2077 3
41.8028476890000036 -87.6113039810000060 11 1 11 2016 1295 2
41.9704333910000003 -87.76

41.7062916559999977 -87.6213320610000039 8 1 11 2016 5086 2
41.7711179369999996 -87.6931851220000027 17 1 11 2016 4232 3
41.9288305319999992 -87.7244495510000064 0 1 11 2016 4060 2
41.9288223249999987 -87.6877816089999982 0 1 11 2016 2877 2
41.8820128480000022 -87.6283300500000024 0 1 11 2016 4350 2
41.9355596079999984 -87.6457494309999987 6 1 11 2016 2180 2
41.8984329580000008 -87.6209868529999909 12 1 11 2016 4884 2
41.8925844030000007 -87.7223294660000050 16 1 11 2016 6039 2
41.9266780630000042 -87.6391567489999943 12 1 11 2016 3796 2
41.9266780630000042 -87.6391567489999943 12 1 11 2016 3796 2
41.8440269409999956 -87.6221008079999990 12 1 11 2016 2545 2
41.9380265449999996 -87.8048096629999861 12 1 11 2016 7287 2
41.7874493569999998 -87.6824622959999971 17 1 11 2016 5369 2
41.9104914380000011 -87.7499366079999987 0 1 11 2016 2669 1
41.6892903809999993 -87.5340752389999892 6 1 11 2016 4301 2
41.8018525280000048 -87.6300456629999900 8 1 11 2016 1199 2
41.7061317229999986 -87.64645631

41.9601829899999998 -87.7045507339999943 12 1 11 2016 3490 2
41.9417097199999986 -87.8349745860000013 12 1 11 2016 2276 2
41.8373789930000015 -87.6353007379999980 8 1 11 2016 1544 1
41.8437279320000002 -87.7074538610000047 12 1 11 2016 3716 2
41.8528215259999996 -87.7210342820000051 7 1 11 2016 2268 2
41.9545355549999996 -87.6512218020000091 14 1 11 2016 3297 2
41.7994048890000016 -87.6010993750000040 8 1 11 2016 2327 2
41.7434499980000027 -87.6025855920000112 12 1 11 2016 3127 1
41.7926888619999986 -87.7047565910000060 9 1 11 2016 5363 1
41.6522142250000016 -87.6190774529999885 0 1 11 2016 3373 2
41.8911691120000000 -87.7586185100000051 11 1 11 2016 4784 1
41.8271742299999971 -87.6404785779999855 10 1 11 2016 4080 2
41.9894312340000013 -87.6695501970000066 1 1 11 2016 6395 2
41.6573213200000012 -87.6059276889999978 0 1 11 2016 3373 2
41.8956751360000013 -87.6285691460000038 10 1 11 2016 3545 2
41.7770003119999984 -87.6678261560000038 0 1 11 2016 1252 2
41.6719153049999989 -87.62608499

## Predictions

### Find the tract and population size for that tract for each crime location
Trying to match the "Census Tracts" from the crimes dataframe against the "TRACTCE10" in the blocks dataframe is not working out. For example, "Census Tracts" has value 703, but there are many such values in the TRACTCE10 (30703, 70300, 170300, 570300, 670300).

This is not necessary now that the database is up and running.

In [16]:
# import re
# from shapely.geometry import MultiPolygon, Point, Polygon

# def create_polygons(df, geom_col, tract_col, block_col, full_block):
#     pattern = re.compile(r'MULTIPOLYGON \(\(\((.*)\)\)\)')
#     coords = df[geom_col].apply(lambda x: pattern.match(x)[1])
#     points = coords.str.split()
    
#     points = points.apply(lambda x: [y.replace(',', '')
#                                       .replace('(', '')
#                                       .replace(')', '') for y in x])
#     polygons = []
#     for i0 in range(0, len(points)):
#         built_points = []
#         for i1 in range(0, len(points[i0]) - 1, 2):
#             built_points.append( \
#                 (np.float(points[i0][i1+1]),  # The first value given is the longitude,
#                  np.float(points[i0][i1])))   # the second is the latitude, so we index accordingly.
        
#         polygons.append(Polygon(built_points))

#     trk = df[tract_col]
#     blk = df[block_col]
#     full_blk = df[full_block]
#     return polygons, trk, blk, full_blk

# polygons, tract_col, block_col, full_block_col = \
#     create_polygons(blocks, 'the_geom', 'TRACTCE10','BLOCKCE10',
#                     'CENSUS BLOCK FULL')

In [17]:
# locations = crimes[['Latitude', 'Longitude']].apply(lambda x: \
#                                                     Point(x[0], x[1]), axis=1)

In [1]:
# mapping_indices = []
# for i in range(len(locations)):
#     found = False
#     for j in range(len(polygons)):
#         if polygons[j].contains(locations[i]):
#             mapping_indices.append(j)
#             found = True
#             break
#     if found == False:
#         raise ValueError(f'Could not find polygon containing location at index: {i}')
# print('Num points could not be placed in polygons:', could_not_find_polygon)

In [None]:
# def get_tract_id(idx):
#     return tract_col[idx]
# def get_block_id(idx):
#     return block_col[idx]
# def get_full_block_id(idx):
#     return full_block_col[idx]
# def get_population_by_tracts(filename):
#     pop = pd.read_csv(filename, header=0)

# crimes['tract_id'] = pd.Series(crimes.index.map(get_tract_id))
# crimes['block_id'] = pd.Series(crimes.index.map(get_block_id))
# crimes['full_block_id'] = pd.Series(crimes.index.map(get_block_id))
# get_population_by_tracts('Population_by_2010_Census_Block.csv')