In [1]:
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime
import csv
import os
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from modules.prediction import load_all_data
from modules.prediction import precrime_train_test_split
from modules.prediction import load_splits
from modules.prediction import create_all_splits
from modules.prediction import sample_model
from modules.poly_ridge import poly_ridge_model
from modules.fancy_time_series import fancy_time_series_model
from modules.eval_model import eval_predictions
from modules.prediction import create_test_period

In [3]:
crime_data = load_all_data()

  mask |= (ar1 == a)


In [4]:
split_future = create_test_period(datetime.date(2017,10,1),datetime.date(2018,9,30))

In [5]:
train_test_data = create_all_splits(crime_data, {'future': split_future})

In [6]:
X_train_future, _, y_train_future, _ = train_test_data['future']

In [7]:
average_weather = X_train_future.groupby([
    'COMPLAINT_MONTH',
    'COMPLAINT_DAY'
])[[
    'temperature',
    'precipIntensity'
]].mean().reset_index()

precinct_codes = pd.DataFrame(X_train_future['ADDR_PCT_CD'].unique(), columns=['ADDR_PCT_CD'])
precinct_codes['key'] = 1

gimme_a_datetime = split_future.copy()
gimme_a_datetime.columns = [c[5:].lower() for c in gimme_a_datetime.columns]
gimme_a_datetime['dayofweek'] = pd.to_datetime(gimme_a_datetime[['year', 'month', 'day']]).dt.dayofweek
gimme_a_datetime.columns = ['COMPLAINT_{0}'.format(c.upper()) for c in gimme_a_datetime.columns]
gimme_a_datetime['key'] = 1

dates_and_precincts = pd.merge(gimme_a_datetime, precinct_codes, how='outer')
dates_and_precincts.drop('key', axis=1, inplace=True)


In [8]:
X_test_future = pd.merge(dates_and_precincts, average_weather)
y_poly_future = poly_ridge_model(X_train_future, y_train_future, X_test_future)

In [9]:
crime_data_with_poly_future = pd.concat(
    [crime_data, pd.merge(X_test_future, y_poly_future)],
    ignore_index=True
)
train_test_data_with_poly_future = create_all_splits(crime_data_with_poly_future, {'ts_future': split_future})
X_train_ts_future, X_test_ts_future, y_train_ts_future, y_test_ts_future = train_test_data_with_poly_future['ts_future']
y_ts_future = fancy_time_series_model(X_train_ts_future, y_train_ts_future, X_test_ts_future, y_test_ts_future).reset_index().drop('index', axis=1)

In [10]:
y_hybrid_future = (y_poly_future + y_ts_future)/2

In [12]:
key_fields = [
    'COMPLAINT_YEAR',
    'COMPLAINT_MONTH',
    'COMPLAINT_DAY',
    'COMPLAINT_HOURGROUP',
    'ADDR_PCT_CD',
]
offense_types = [x for x in y_train_future.select_dtypes(exclude=['object']).columns]

In [13]:
y_hybrid_future.groupby([
    'COMPLAINT_MONTH',
    'ADDR_PCT_CD'
])[offense_types].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other
COMPLAINT_MONTH,ADDR_PCT_CD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1.0,1.0,0.072230,2.100566,4.871484,7.862098,6.548280,91.463115,1.513635,6.840131,7.805047,0.139873,1.437520,1.874032,7.226758,6.809752
1.0,5.0,0.027475,1.419239,5.919904,15.654493,4.955524,46.916769,0.935852,1.987410,8.278143,0.220707,1.591453,2.105977,7.542259,15.985502
1.0,6.0,0.023864,1.452340,9.285432,10.476005,8.974319,91.135828,1.767847,4.205747,5.453045,0.134145,4.404717,2.366887,10.124778,6.627241
1.0,7.0,0.029462,1.535719,8.617750,12.163267,3.467917,33.258567,1.471686,2.515506,4.214109,0.694870,4.578393,3.314667,7.849783,9.713498
1.0,9.0,0.044028,1.840533,9.772930,13.518679,9.333950,70.526438,2.502819,6.471534,4.437825,0.336264,5.031691,3.931925,6.067967,9.769181
1.0,10.0,0.028449,2.045123,6.379834,10.398740,4.729530,58.713374,2.235611,4.835604,4.595222,0.192616,2.977414,2.936002,7.233003,6.668617
1.0,13.0,0.161874,3.322637,9.328179,13.837298,14.005352,117.940601,2.005111,10.370567,7.894587,0.277176,2.598944,3.495919,8.540233,7.911963
1.0,14.0,0.209277,4.327782,12.514388,17.612233,21.417918,184.978638,1.445657,4.356146,24.331208,0.201190,5.406334,5.317742,13.790159,13.359393
1.0,17.0,0.069861,1.339258,4.461581,6.618433,4.922146,63.327303,1.473536,5.885425,4.198504,0.046631,1.263623,1.097345,7.502597,4.899748
1.0,18.0,0.030496,3.751515,9.031067,13.647970,12.779591,153.260054,2.234228,8.644670,9.065960,0.117195,2.654517,1.824640,10.335447,8.455610


In [21]:
crime_data[crime_data['COMPLAINT_YEAR'] == 2016].groupby([
    'COMPLAINT_MONTH',
    'ADDR_PCT_CD'
])[offense_types].sum() - \
y_hybrid_future.groupby([
    'COMPLAINT_MONTH',
    'ADDR_PCT_CD'
])[offense_types].sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other
COMPLAINT_MONTH,ADDR_PCT_CD,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
1,1,-0.072230,-0.100566,-0.871484,-4.862098,-0.548280,1.536885,-0.513635,-0.840131,0.194953,-0.139873,-0.437520,-0.874032,-4.226758,2.190248
1,5,-0.027475,-0.419239,1.080096,11.345507,-0.955524,3.083231,-0.935852,1.012590,4.721857,0.779293,-0.591453,1.894023,-3.542259,3.014498
1,6,-0.023864,-0.452340,7.714568,3.523995,2.025681,-7.135828,0.232153,-1.205747,-1.453045,-0.134145,-0.404717,0.633113,-1.124778,-1.627241
1,7,-0.029462,0.464281,0.382250,1.836733,3.532083,7.741433,-1.471686,1.484494,-2.214109,1.305130,-2.578393,-1.314667,0.150217,4.286502
1,9,-0.044028,-0.840533,1.227070,-1.518679,-0.333950,3.473562,2.497181,1.528466,1.562175,1.663736,-2.031691,1.068075,-4.067967,2.230819
1,10,-0.028449,-1.045123,-1.379834,-2.398740,2.270470,-10.713374,-2.235611,-1.835604,-2.595222,1.807384,0.022586,-0.936002,-5.233003,-4.668617
1,13,-0.161874,-1.322637,3.671821,-1.837298,13.994648,-14.940601,-1.005111,-4.370567,-0.894587,1.722824,-0.598944,1.504081,-4.540233,2.088037
1,14,-0.209277,0.672218,1.485612,3.387767,1.582082,-10.978638,0.554343,0.643854,-9.331208,-0.201190,9.593666,-3.317742,-5.790159,-3.359393
1,17,-0.069861,-1.339258,2.538419,1.381567,6.077854,-5.327303,2.526464,-1.885425,-1.198504,-0.046631,-1.263623,-0.097345,-5.502597,-1.899748
1,18,-0.030496,-2.751515,10.968933,2.352030,-1.779591,-0.260054,-2.234228,-0.644670,-4.065960,-0.117195,1.345483,-1.824640,0.664553,-4.455610
