In [1]:
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime
import csv
import os
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from modules.prediction import load_all_data
from modules.prediction import precrime_train_test_split
from modules.prediction import load_splits
from modules.prediction import create_all_splits
from modules.prediction import sample_model
from modules.poly_ridge import poly_ridge_model
from modules.fancy_time_series import fancy_time_series_model
from modules.eval_model import eval_predictions


In [3]:
crime_data = load_all_data()

  mask |= (ar1 == a)


In [4]:
from modules.prediction import create_test_period

In [5]:
split_all_2017 = create_test_period(datetime.date(2017,1,1),datetime.date(2017,10,1))
split_thru_june_2017 = create_test_period(datetime.date(2017,1,1),datetime.date(2017,7,1))

In [6]:
train_test_data = create_all_splits(crime_data, {'all_2017': split_all_2017, 'thru_june': split_thru_june_2017})

In [7]:
X_train_2017, X_test_2017, y_train_2017, y_test_2017 = train_test_data['all_2017']
X_train_june, X_test_june, y_train_june, y_test_june = train_test_data['thru_june']

In [8]:
y_poly_2017 = poly_ridge_model(X_train_2017, y_train_2017, X_test_2017)
y_poly_june = poly_ridge_model(X_train_june, y_train_june, X_test_june)

y_ts_2017 = fancy_time_series_model(X_train_2017, y_train_2017, X_test_2017, y_test_2017)
y_ts_june = fancy_time_series_model(X_train_june, y_train_june, X_test_june, y_test_june)

y_hybrid_2017 = (y_poly_2017 + y_ts_2017) / 2
y_hybrid_june = (y_poly_june + y_ts_june) / 2

In [9]:
eval_predictions(X_test_2017, y_test_2017, y_hybrid_2017)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =     -0.4, RMSE =     0.041, RMSE (%) =  2730.391
Rape:             R2 =      0.0, RMSE =     0.112, RMSE (%) =   918.150
Robbery:          R2 =      3.7, RMSE =     0.291, RMSE (%) =   358.580
FelonyAssault:    R2 =      5.4, RMSE =     0.382, RMSE (%) =   318.653
Burglary:         R2 =      1.0, RMSE =     0.269, RMSE (%) =   391.755
GrandLarceny:     R2 =     10.7, RMSE =     0.501, RMSE (%) =   213.885
GrandLarcenyAuto: R2 =      1.4, RMSE =     0.183, RMSE (%) =   562.755
Fraud:            R2 =      1.7, RMSE =     0.152, RMSE (%) =   675.712
Forgery:          R2 =      2.6, RMSE =     0.181, RMSE (%) =   586.003
Arson:            R2 =     -0.4, RMSE =     0.065, RMSE (%) =  1623.359
Drugs:            R2 =      2.8, RMSE =     0.168, RMSE (%) =   623.468
Weapons:          R2 =      3.2, RMSE =     0.174, RMSE

In [10]:
eval_predictions(X_test_june, y_test_june, y_hybrid_june)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =     -0.5, RMSE =     0.040, RMSE (%) =  2794.489
Rape:             R2 =      0.1, RMSE =     0.111, RMSE (%) =   915.342
Robbery:          R2 =      3.8, RMSE =     0.288, RMSE (%) =   363.713
FelonyAssault:    R2 =      5.3, RMSE =     0.377, RMSE (%) =   322.523
Burglary:         R2 =      1.2, RMSE =     0.269, RMSE (%) =   394.671
GrandLarceny:     R2 =     11.6, RMSE =     0.499, RMSE (%) =   211.911
GrandLarcenyAuto: R2 =      1.2, RMSE =     0.174, RMSE (%) =   586.598
Fraud:            R2 =      1.9, RMSE =     0.160, RMSE (%) =   643.028
Forgery:          R2 =      3.1, RMSE =     0.185, RMSE (%) =   564.507
Arson:            R2 =     -0.4, RMSE =     0.066, RMSE (%) =  1578.134
Drugs:            R2 =      2.7, RMSE =     0.167, RMSE (%) =   620.130
Weapons:          R2 =      2.9, RMSE =     0.173, RMSE

In [11]:
eval_predictions(X_test_june, y_test_june, y_poly_june)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =      0.0, RMSE =     0.040, RMSE (%) =  2787.736
Rape:             R2 =      0.4, RMSE =     0.111, RMSE (%) =   913.859
Robbery:          R2 =      3.3, RMSE =     0.288, RMSE (%) =   364.645
FelonyAssault:    R2 =      4.9, RMSE =     0.378, RMSE (%) =   323.177
Burglary:         R2 =      0.4, RMSE =     0.270, RMSE (%) =   396.294
GrandLarceny:     R2 =      9.2, RMSE =     0.506, RMSE (%) =   214.827
GrandLarcenyAuto: R2 =      0.7, RMSE =     0.175, RMSE (%) =   588.015
Fraud:            R2 =      1.7, RMSE =     0.160, RMSE (%) =   643.468
Forgery:          R2 =      1.9, RMSE =     0.187, RMSE (%) =   568.051
Arson:            R2 =      0.0, RMSE =     0.066, RMSE (%) =  1574.851
Drugs:            R2 =      1.2, RMSE =     0.168, RMSE (%) =   624.768
Weapons:          R2 =      2.3, RMSE =     0.173, RMSE

In [12]:
eval_predictions(X_test_june, y_test_june, y_ts_june)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =     -2.0, RMSE =     0.040, RMSE (%) =  2815.388
Rape:             R2 =     -1.2, RMSE =     0.112, RMSE (%) =   921.286
Robbery:          R2 =      2.8, RMSE =     0.289, RMSE (%) =   365.595
FelonyAssault:    R2 =      4.3, RMSE =     0.379, RMSE (%) =   324.184
Burglary:         R2 =      0.3, RMSE =     0.270, RMSE (%) =   396.455
GrandLarceny:     R2 =     11.5, RMSE =     0.500, RMSE (%) =   212.093
GrandLarcenyAuto: R2 =      0.1, RMSE =     0.175, RMSE (%) =   589.811
Fraud:            R2 =      0.8, RMSE =     0.161, RMSE (%) =   646.612
Forgery:          R2 =      1.9, RMSE =     0.187, RMSE (%) =   567.885
Arson:            R2 =     -1.8, RMSE =     0.067, RMSE (%) =  1589.589
Drugs:            R2 =      2.0, RMSE =     0.168, RMSE (%) =   622.085
Weapons:          R2 =      1.6, RMSE =     0.174, RMSE

In [13]:
y_ts_june.mean()

COMPLAINT_YEAR         2017.000000
COMPLAINT_MONTH           3.508287
COMPLAINT_DAY            15.602210
COMPLAINT_HOURGROUP      10.000000
ADDR_PCT_CD              63.324675
COMPLAINT_DAYOFWEEK       2.988950
Homicide                  0.001738
Rape                      0.012426
Robbery                   0.082572
FelonyAssault             0.113034
Burglary                  0.069732
GrandLarceny              0.237703
GrandLarcenyAuto          0.033797
Fraud                     0.023682
Forgery                   0.032948
Arson                     0.004046
Drugs                     0.025594
Weapons                   0.029019
CriminalMischief          0.053754
Other                     0.079013
dtype: float64

In [14]:
y_ts_june.std()

COMPLAINT_YEAR          0.000000
COMPLAINT_MONTH         1.709971
COMPLAINT_DAY           8.735374
COMPLAINT_HOURGROUP     6.831341
ADDR_PCT_CD            35.883301
COMPLAINT_DAYOFWEEK     1.999981
Homicide                0.005914
Rape                    0.016860
Robbery                 0.075219
FelonyAssault           0.104608
Burglary                0.054212
GrandLarceny            0.202411
GrandLarcenyAuto        0.037232
Fraud                   0.031444
Forgery                 0.048583
Arson                   0.009803
Drugs                   0.039469
Weapons                 0.046308
CriminalMischief        0.049730
Other                   0.074806
dtype: float64