In [1]:
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime
import csv
import os
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from modules.prediction import load_all_data
from modules.prediction import precrime_train_test_split
from modules.prediction import load_splits
from modules.prediction import create_all_splits
from modules.prediction import sample_model
from modules.poly_ridge import poly_ridge_model
from modules.eval_model import eval_predictions

In [3]:
crime_data = load_all_data()
splits = load_splits()
train_test_data = create_all_splits(crime_data, splits)

  mask |= (ar1 == a)


In [4]:
X_train_fine, X_test_fine, y_train_fine, y_test_fine = train_test_data['fine']
X_train_coarse, X_test_coarse, y_train_coarse, y_test_coarse = train_test_data['coarse']
X_train_2016, X_test_2016, y_train_2016, y_test_2016 = train_test_data['2016']

In [5]:
y_simple_fine = sample_model(X_train_fine, y_train_fine, X_test_fine)
y_simple_coarse = sample_model(X_train_coarse, y_train_coarse, X_test_coarse)
y_simple_2016 = sample_model(X_train_2016, y_train_2016, X_test_2016)

y_poly_fine = poly_ridge_model(X_train_fine, y_train_fine, X_test_fine)
y_poly_coarse = poly_ridge_model(X_train_coarse, y_train_coarse, X_test_coarse)
y_poly_2016 = poly_ridge_model(X_train_2016, y_train_2016, X_test_2016)

In [6]:
eval_predictions(X_test_fine, y_test_fine, y_simple_fine)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.3, MSE = 0.0030
Rape: R2 = 0.5, MSE = 0.0148
Robbery: R2 = 4.6, MSE = 0.1222
FelonyAssault: R2 = 5.1, MSE = 0.1332
Burglary: R2 = 3.6, MSE = 0.1198
GrandLarceny: R2 = 12.6, MSE = 0.2746
GrandLarcenyAuto: R2 = 3.5, MSE = 0.0613
Fraud: R2 = 2.2, MSE = 0.0336
Forgery: R2 = 2.2, MSE = 0.0336
Arson: R2 = 0.4, MSE = 0.0088
Drugs: R2 = 3.5, MSE = 0.0402
Weapons: R2 = 2.8, MSE = 0.0312
CriminalMischief: R2 = 1.7, MSE = 0.0528
Other: R2 = 3.9, MSE = 0.0806

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 0.4, MSE = 0.0038
Rape: R2 = 1.0, MSE = 0.0189
Robbery: R2 = 8.3, MSE = 0.1556
FelonyAssault: R2 = 8.7, MSE = 0.1704
Burglary: R2 = 6.0, MSE = 0.1536
GrandLarceny: R2 = 18.8, MSE = 0.3529
GrandLarcenyAuto: R2 = 5.3, MSE = 0.0784
Fraud: R2 = 3.1, MSE = 0.0431
Forgery: R2 = 3.3, MSE = 0.0430
Arson: R2 = 0.7, MSE = 0.0112
Drugs

In [7]:
eval_predictions(X_test_fine, y_test_fine, y_poly_fine)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.3, MSE = 0.0030
Rape: R2 = 0.5, MSE = 0.0148
Robbery: R2 = 4.8, MSE = 0.1220
FelonyAssault: R2 = 5.1, MSE = 0.1332
Burglary: R2 = 3.8, MSE = 0.1195
GrandLarceny: R2 = 12.7, MSE = 0.2745
GrandLarcenyAuto: R2 = 3.9, MSE = 0.0611
Fraud: R2 = 2.3, MSE = 0.0335
Forgery: R2 = 2.3, MSE = 0.0336
Arson: R2 = 0.4, MSE = 0.0088
Drugs: R2 = 3.7, MSE = 0.0401
Weapons: R2 = 2.9, MSE = 0.0312
CriminalMischief: R2 = 1.8, MSE = 0.0528
Other: R2 = 4.0, MSE = 0.0805

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 0.4, MSE = 0.0038
Rape: R2 = 1.1, MSE = 0.0189
Robbery: R2 = 8.5, MSE = 0.1552
FelonyAssault: R2 = 8.7, MSE = 0.1703
Burglary: R2 = 6.3, MSE = 0.1531
GrandLarceny: R2 = 18.9, MSE = 0.3526
GrandLarcenyAuto: R2 = 5.9, MSE = 0.0779
Fraud: R2 = 3.3, MSE = 0.0430
Forgery: R2 = 3.5, MSE = 0.0430
Arson: R2 = 0.7, MSE = 0.0112
Drugs

In [8]:
eval_predictions(X_test_coarse, y_test_coarse, y_simple_coarse)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.2, MSE = 0.0025
Rape: R2 = 0.6, MSE = 0.0149
Robbery: R2 = 4.4, MSE = 0.1201
FelonyAssault: R2 = 5.7, MSE = 0.1411
Burglary: R2 = 2.8, MSE = 0.1103
GrandLarceny: R2 = 12.2, MSE = 0.2825
GrandLarcenyAuto: R2 = 2.8, MSE = 0.0551
Fraud: R2 = 2.0, MSE = 0.0308
Forgery: R2 = 1.8, MSE = 0.0313
Arson: R2 = 0.4, MSE = 0.0081
Drugs: R2 = 2.8, MSE = 0.0356
Weapons: R2 = 3.4, MSE = 0.0336
CriminalMischief: R2 = 1.7, MSE = 0.0518
Other: R2 = 4.0, MSE = 0.0855

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 0.9, MSE = 0.0156
Rape: R2 = 2.2, MSE = 0.0925
Robbery: R2 = 16.3, MSE = 0.7515
FelonyAssault: R2 = 21.6, MSE = 0.8879
Burglary: R2 = 10.5, MSE = 0.7127
GrandLarceny: R2 = 34.8, MSE = 1.7709
GrandLarcenyAuto: R2 = 8.8, MSE = 0.3541
Fraud: R2 = 4.0, MSE = 0.1892
Forgery: R2 = 6.1, MSE = 0.2011
Arson: R2 = 2.0, MSE = 0.0503
Dr

In [9]:
eval_predictions(X_test_coarse, y_test_coarse, y_poly_coarse)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.2, MSE = 0.0025
Rape: R2 = 0.6, MSE = 0.0149
Robbery: R2 = 4.6, MSE = 0.1198
FelonyAssault: R2 = 5.8, MSE = 0.1410
Burglary: R2 = 3.2, MSE = 0.1099
GrandLarceny: R2 = 12.2, MSE = 0.2825
GrandLarcenyAuto: R2 = 3.3, MSE = 0.0548
Fraud: R2 = 2.1, MSE = 0.0308
Forgery: R2 = 1.8, MSE = 0.0313
Arson: R2 = 0.5, MSE = 0.0081
Drugs: R2 = 3.0, MSE = 0.0355
Weapons: R2 = 3.4, MSE = 0.0336
CriminalMischief: R2 = 1.9, MSE = 0.0517
Other: R2 = 4.1, MSE = 0.0854

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 1.0, MSE = 0.0156
Rape: R2 = 2.5, MSE = 0.0922
Robbery: R2 = 17.0, MSE = 0.7454
FelonyAssault: R2 = 22.0, MSE = 0.8833
Burglary: R2 = 12.3, MSE = 0.6987
GrandLarceny: R2 = 34.9, MSE = 1.7679
GrandLarcenyAuto: R2 = 11.4, MSE = 0.3440
Fraud: R2 = 4.6, MSE = 0.1882
Forgery: R2 = 6.5, MSE = 0.2002
Arson: R2 = 2.3, MSE = 0.0502
D

In [10]:
eval_predictions(X_test_2016, y_test_2016, y_simple_2016)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.1, MSE = 0.0020
Rape: R2 = 0.3, MSE = 0.0129
Robbery: R2 = 2.9, MSE = 0.0969
FelonyAssault: R2 = 5.3, MSE = 0.1444
Burglary: R2 = -1.5, MSE = 0.0830
GrandLarceny: R2 = 10.1, MSE = 0.2713
GrandLarcenyAuto: R2 = -0.6, MSE = 0.0386
Fraud: R2 = 0.7, MSE = 0.0241
Forgery: R2 = 1.7, MSE = 0.0397
Arson: R2 = -0.2, MSE = 0.0050
Drugs: R2 = 0.9, MSE = 0.0308
Weapons: R2 = 3.3, MSE = 0.0342
CriminalMischief: R2 = 1.2, MSE = 0.0645
Other: R2 = 3.7, MSE = 0.0915

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 0.3, MSE = 0.0122
Rape: R2 = 1.2, MSE = 0.0780
Robbery: R2 = 10.7, MSE = 0.6259
FelonyAssault: R2 = 21.0, MSE = 0.9004
Burglary: R2 = -5.6, MSE = 0.5724
GrandLarceny: R2 = 29.0, MSE = 1.7380
GrandLarcenyAuto: R2 = -5.2, MSE = 0.2623
Fraud: R2 = 0.7, MSE = 0.1505
Forgery: R2 = 5.5, MSE = 0.2634
Arson: R2 = -1.3, MSE = 0.03

In [11]:
eval_predictions(X_test_2016, y_test_2016, y_poly_2016)

----------------------------------------
Four-hour buckets:
----------------------------------------
Homicide: R2 = 0.1, MSE = 0.0020
Rape: R2 = 0.3, MSE = 0.0129
Robbery: R2 = 3.6, MSE = 0.0962
FelonyAssault: R2 = 5.3, MSE = 0.1443
Burglary: R2 = 0.0, MSE = 0.0818
GrandLarceny: R2 = 9.5, MSE = 0.2730
GrandLarcenyAuto: R2 = 0.7, MSE = 0.0381
Fraud: R2 = 1.0, MSE = 0.0240
Forgery: R2 = 1.8, MSE = 0.0397
Arson: R2 = 0.1, MSE = 0.0050
Drugs: R2 = 1.1, MSE = 0.0307
Weapons: R2 = 3.2, MSE = 0.0342
CriminalMischief: R2 = 1.5, MSE = 0.0644
Other: R2 = 3.8, MSE = 0.0914

----------------------------------------
Days:
----------------------------------------
Homicide: R2 = 0.5, MSE = 0.0122
Rape: R2 = 1.1, MSE = 0.0781
Robbery: R2 = 14.0, MSE = 0.6026
FelonyAssault: R2 = 21.1, MSE = 0.8986
Burglary: R2 = 2.6, MSE = 0.5284
GrandLarceny: R2 = 26.5, MSE = 1.8003
GrandLarcenyAuto: R2 = 2.1, MSE = 0.2440
Fraud: R2 = 2.3, MSE = 0.1480
Forgery: R2 = 6.0, MSE = 0.2619
Arson: R2 = 0.2, MSE = 0.0307
Drug

In [12]:
y_poly_2016

Unnamed: 0,COMPLAINT_YEAR,COMPLAINT_MONTH,COMPLAINT_DAY,COMPLAINT_HOURGROUP,ADDR_PCT_CD,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other
1686762,2016,1,1,0,1,-0.000575,0.027556,0.046513,0.058569,0.021240,0.481758,-0.013571,0.043197,0.045037,0.007300,0.000100,0.019995,0.049014,0.040543
1686763,2016,1,1,0,5,-0.000255,0.026780,0.056966,0.084679,-0.006742,0.223576,-0.023978,0.023386,0.048871,0.008943,0.003570,0.021455,0.051707,0.060035
1686764,2016,1,1,0,6,-0.000438,0.027921,0.074905,0.070391,0.025714,0.486369,-0.014118,0.040504,0.018561,0.005885,0.025572,0.022277,0.066862,0.033650
1686765,2016,1,1,0,7,-0.000164,0.027693,0.072623,0.074363,-0.021714,0.119728,-0.014027,0.024436,0.018881,0.010404,0.036938,0.033278,0.042167,0.054192
1686766,2016,1,1,0,9,-0.000210,0.031710,0.086180,0.089426,0.031511,0.339292,-0.008412,0.053194,0.019657,0.009582,0.038444,0.035834,0.047234,0.056657
1686767,2016,1,1,0,10,-0.000210,0.027510,0.062215,0.074363,-0.014045,0.305376,-0.013114,0.044521,0.021985,0.008395,0.024157,0.034465,0.047097,0.040726
1686768,2016,1,1,0,13,-0.000301,0.036320,0.093758,0.092850,0.068987,0.667407,-0.006769,0.074329,0.040016,0.008076,0.014936,0.032593,0.054446,0.047847
1686769,2016,1,1,0,14,-0.000073,0.034814,0.099236,0.099788,0.118013,1.067781,-0.023339,0.043425,0.132634,0.006387,0.023746,0.039897,0.079507,0.075509
1686770,2016,1,1,0,17,-0.000757,0.027464,0.034781,0.047613,-0.003638,0.320075,-0.020418,0.046118,0.018196,0.005656,-0.000858,0.014015,0.045088,0.023927
1686771,2016,1,1,0,18,-0.000118,0.031984,0.071801,0.083903,0.036989,0.833655,-0.014392,0.067847,0.040609,0.007756,0.019181,0.019949,0.059239,0.043282


In [13]:
y_test_with_dates_2016 = y_test_2016.merge(
        X_test_2016[[
            'COMPLAINT_YEAR', 'COMPLAINT_MONTH',
            'COMPLAINT_DAY', 'ADDR_PCT_CD'
        ]], left_index=True, right_index=True
    )

In [14]:
y_test_with_dates_2016

Unnamed: 0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other,COMPLAINT_IDS,COMPLAINT_YEAR,COMPLAINT_MONTH,COMPLAINT_DAY,ADDR_PCT_CD
1686762,0,0,0,0,0,1,0,0,0,0,0,0,1,0,829406806 496203963,2016,1,1,1
1686763,0,0,0,0,0,0,0,0,0,0,0,0,0,0,,2016,1,1,5
1686764,0,0,0,2,0,3,0,0,0,0,0,0,0,0,277684027 920296197 244825040 212075349 502079527,2016,1,1,6
1686765,0,0,0,3,0,2,0,0,0,0,0,0,0,0,363755831 943694977 477840378 324448342 970238645,2016,1,1,7
1686766,0,1,0,0,0,2,0,0,0,0,0,0,0,1,792470770 288319671 249651675 653615052,2016,1,1,9
1686767,0,0,0,0,0,2,0,1,0,0,0,0,0,0,387032950 460930996 402560815,2016,1,1,10
1686768,0,0,0,1,0,1,0,0,0,0,0,0,0,0,430816562 909325568,2016,1,1,13
1686769,0,1,0,0,0,11,0,0,0,0,0,0,0,1,734649377 566386598 726004241 859637331 273270...,2016,1,1,14
1686770,0,0,0,0,0,1,0,0,0,0,0,0,0,0,726514722,2016,1,1,17
1686771,0,0,2,1,0,7,0,0,0,0,0,0,1,0,267825963 556147663 628667691 155589124 589524...,2016,1,1,18


In [15]:
crime_types = y_test_2016.select_dtypes(exclude=['object']).columns

In [16]:
y_test_monthly = y_test_with_dates_2016.groupby([
        'COMPLAINT_YEAR', 'COMPLAINT_MONTH'
])[crime_types].sum()
y_pred_monthly = y_poly_2016.groupby([
        'COMPLAINT_YEAR', 'COMPLAINT_MONTH'
])[crime_types].sum()


In [17]:
y_test_monthly

Unnamed: 0_level_0,Unnamed: 1_level_0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other
COMPLAINT_YEAR,COMPLAINT_MONTH,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016,1,22,141,1335,1673,1070,3332,467,353,504,69,379,395,773,1191
2016,2,17,162,1109,1368,982,3102,414,360,495,63,438,404,710,1072
2016,3,25,175,1151,1614,1027,3469,418,392,586,69,436,506,892,1265
2016,4,28,189,1204,1601,1000,3461,453,373,535,61,395,519,822,1215
2016,5,31,206,1279,1823,1070,3658,560,349,514,83,366,479,874,1262
2016,6,32,199,1268,1864,1095,3685,577,352,480,62,391,460,830,1285
2016,7,30,229,1393,2126,1148,3781,594,348,438,84,346,386,898,1266
2016,8,36,227,1402,2021,1166,3826,675,351,519,64,437,484,866,1275
2016,9,34,189,1334,1778,1038,3526,593,299,518,48,369,465,760,1202
2016,10,20,172,1378,1738,1142,3791,589,299,505,50,461,507,838,1283


In [18]:
y_pred_monthly

Unnamed: 0_level_0,Unnamed: 1_level_0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other
COMPLAINT_YEAR,COMPLAINT_MONTH,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2016,1,18.659358,226.519953,1466.600768,1677.614687,1247.414421,3709.076945,544.434258,436.234497,479.503644,105.926342,369.923343,409.843194,827.524109,1233.620249
2016,2,14.490595,172.33288,1074.916066,1509.982101,1041.36946,3299.436299,427.904285,437.858646,475.869681,82.622434,367.565669,323.037179,742.611325,1140.911458
2016,3,17.847602,188.825596,1234.506405,1801.535904,1104.538071,3754.945798,489.206492,457.070825,562.213422,88.609778,411.999624,384.737554,866.339104,1325.600684
2016,4,18.364595,174.850421,1137.64184,1768.410041,1058.083318,3706.36701,526.639054,391.152278,505.801023,73.256189,352.985927,363.908013,853.233492,1300.552374
2016,5,22.457989,186.203412,1359.91704,2006.421863,1224.671129,4013.602144,604.964549,358.295503,491.405752,76.969674,320.802124,355.593337,891.734804,1336.933338
2016,6,24.568068,216.367964,1365.753215,2028.749557,1252.194133,4148.661647,604.036765,336.074665,474.689402,59.952592,313.645922,349.279451,896.878469,1343.719616
2016,7,31.402801,229.236681,1467.440027,2141.296133,1363.702625,4311.573818,694.854874,307.151167,468.308881,71.032821,284.98331,359.325458,958.623536,1406.404873
2016,8,26.920697,221.198626,1474.706497,2080.659281,1446.633132,4443.433896,729.696347,322.249511,487.218904,67.217648,335.900507,370.233138,948.29452,1396.314666
2016,9,24.547607,204.189541,1505.583588,1932.150729,1373.47708,4343.825847,725.331777,330.328616,468.849418,76.598018,332.632615,352.427712,905.698389,1364.252466
2016,10,25.019231,191.351779,1645.280885,1899.769992,1416.107154,4454.653186,700.343883,325.242311,521.518255,86.693827,355.94925,335.40912,942.483338,1321.523912
