In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from geopandas import GeoDataFrame
import geopandas
from shapely.geometry import Point
import re
from fiona.crs import from_epsg
from shapely import geometry
from shapely.geometry import Polygon

#### Here we first merge the population density to the original dataset

In [4]:
population = pd.read_csv("DEC_10_SF1_P8_with_ann.csv")
cs_shape = GeoDataFrame.from_file("Census_Tract/geo_export_cd871589-cb60-4202-b265-8cc7ce7eda51.shp")
cs_shape = cs_shape.rename(columns = {"name10":"tractnum"})
cs_shape["tractnum"] = pd.to_numeric(cs_shape["tractnum"])
population = population.rename(columns={"GEO.display-label":"tractnum"})

### define a function to find the census tract number
def findnum(data):
    reg = re.findall(r"\d+\.*\d*", data)
    return float(reg[0])

In [5]:
population = population[["tractnum", "D001"]].drop([0])
population["tractnum"] = population["tractnum"].apply(findnum)
population["tractnum"] = pd.to_numeric(population["tractnum"])
cs_info = population.merge(cs_shape, on="tractnum")
cs_info = GeoDataFrame(cs_info, crs={'init': 'epsg:4326'}, geometry = "geometry")
beats = GeoDataFrame.from_file("beats/geo_export_197eb2e2-6cc6-4e1d-b7f1-d298aca5f97c.shp")

## define function to obtain the population density
def mappopulation(data):
    popu = 0
    for i in cs_info.index:
        ratio = data.geometry.intersection(cs_info.geometry[i]).area/cs_info.geometry[i].area
        p_r = cs_info["D001"][i]*ratio
        popu += p_r
    return popu

In [6]:
cs_info["D001"] = pd.to_numeric(cs_info["D001"])
beats["pop_num"] = beats.apply(mappopulation, axis=1)
pop_den = beats.copy()
pop_den = pop_den.to_crs(epsg=2263)
pop_den["density"] = pop_den["pop_num"]/(pop_den.geometry.area/(5280)**2)
pop_den = pop_den[["geometry", "density", "beat_num"]]

In [7]:
data = pd.read_csv("ChicagoCrime.csv", parse_dates=["Date"]) # read the Chicago crime data

In [8]:
data.columns

Index(['ID', 'Case Number', 'Date', 'Block', 'IUCR', 'Primary Type',
       'Description', 'Location Description', 'Arrest', 'Domestic', 'Beat',
       'District', 'Ward', 'Community Area', 'FBI Code', 'X Coordinate',
       'Y Coordinate', 'Year', 'Updated On', 'Latitude', 'Longitude',
       'Location'],
      dtype='object')

#### Choose the necessary feature from the dataset

In [9]:
d_f = data[["Date", 'Block', 'Beat', 'District', 'Primary Type', 'Ward', 'Location Description', 'Community Area']]
d_f["Year"] = d_f["Date"].apply(lambda x: x.year)
d_f["Month"] = d_f["Date"].apply(lambda x: x.month)
d_f["Day"] = d_f["Date"].apply(lambda x: x.day)
d_f["Dow"] = d_f["Date"].apply(lambda x: x.dayofweek)
d_f["Month"] = d_f["Date"].apply(lambda x: x.month)
d_f["Hour"] = d_f["Date"].apply(lambda x: x.hour)
d_f["Minute"] = d_f["Date"].apply(lambda x: x.minute)
d_f["Second"] = d_f["Date"].apply(lambda x: x.second)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  from ipykernel import kernelapp as app
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  app.launch_new_instance()
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas

In [10]:
d_f = pd.concat([d_f, data[["Latitude", "Longitude"]]],axis=1)
d_f = d_f[d_f["Primary Type"].isin(["BURGLARY", "BATTERY", "CRIM SEXUAL ASSAULT", "HOMICIDE", "ROBBERY"])]

In [12]:
d_f.to_csv("out_data/chicago_features.csv")