In [1]:
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime
import csv
import os
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from modules.prediction import load_all_data
from modules.prediction import precrime_train_test_split
from modules.prediction import load_splits
from modules.prediction import create_all_splits
from modules.prediction import sample_model
from modules.poly_ridge import poly_ridge_model
from modules.eval_model import eval_predictions

In [3]:
crime_data = load_all_data()
splits = load_splits()
train_test_data = create_all_splits(crime_data, splits)

  mask |= (ar1 == a)


In [4]:
X_train_fine, X_test_fine, y_train_fine, y_test_fine = train_test_data['fine']
X_train_coarse, X_test_coarse, y_train_coarse, y_test_coarse = train_test_data['coarse']
X_train_2016, X_test_2016, y_train_2016, y_test_2016 = train_test_data['2016']

In [5]:
y_poly_fine = poly_ridge_model(X_train_fine, y_train_fine, X_test_fine)
y_poly_coarse = poly_ridge_model(X_train_coarse, y_train_coarse, X_test_coarse)
y_poly_2016 = poly_ridge_model(X_train_2016, y_train_2016, X_test_2016)

In [6]:
eval_predictions(X_test_fine, y_test_fine, y_poly_fine)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =      0.3, RMSE =     0.054, RMSE (%) =  1954.125
Rape:             R2 =      0.5, RMSE =     0.122, RMSE (%) =   875.009
Robbery:          R2 =      4.8, RMSE =     0.349, RMSE (%) =   301.658
FelonyAssault:    R2 =      5.1, RMSE =     0.365, RMSE (%) =   332.094
Burglary:         R2 =      3.8, RMSE =     0.346, RMSE (%) =   313.868
GrandLarceny:     R2 =     12.8, RMSE =     0.524, RMSE (%) =   208.714
GrandLarcenyAuto: R2 =      4.0, RMSE =     0.247, RMSE (%) =   419.259
Fraud:            R2 =      2.3, RMSE =     0.183, RMSE (%) =   573.995
Forgery:          R2 =      2.3, RMSE =     0.183, RMSE (%) =   617.558
Arson:            R2 =      0.4, RMSE =     0.094, RMSE (%) =  1182.312
Drugs:            R2 =      3.8, RMSE =     0.200, RMSE (%) =   558.053
Weapons:          R2 =      3.0, RMSE =     0.177, RMSE

In [7]:
eval_predictions(X_test_coarse, y_test_coarse, y_poly_coarse)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =      0.2, RMSE =     0.050, RMSE (%) =  2093.946
Rape:             R2 =      0.6, RMSE =     0.122, RMSE (%) =   860.606
Robbery:          R2 =      4.7, RMSE =     0.346, RMSE (%) =   303.417
FelonyAssault:    R2 =      5.9, RMSE =     0.375, RMSE (%) =   320.693
Burglary:         R2 =      3.2, RMSE =     0.331, RMSE (%) =   323.295
GrandLarceny:     R2 =     12.3, RMSE =     0.531, RMSE (%) =   204.567
GrandLarcenyAuto: R2 =      3.5, RMSE =     0.234, RMSE (%) =   439.711
Fraud:            R2 =      2.1, RMSE =     0.175, RMSE (%) =   595.846
Forgery:          R2 =      1.9, RMSE =     0.177, RMSE (%) =   629.669
Arson:            R2 =      0.5, RMSE =     0.090, RMSE (%) =  1228.189
Drugs:            R2 =      3.2, RMSE =     0.188, RMSE (%) =   581.364
Weapons:          R2 =      3.5, RMSE =     0.183, RMSE

In [8]:
eval_predictions(X_test_2016, y_test_2016, y_poly_2016)

------------------------------------------------------------------
Four-hour buckets:
------------------------------------------------------------------
Homicide:         R2 =      0.2, RMSE =     0.045, RMSE (%) =  2380.479
Rape:             R2 =      0.3, RMSE =     0.113, RMSE (%) =   904.613
Robbery:          R2 =      3.7, RMSE =     0.310, RMSE (%) =   338.893
FelonyAssault:    R2 =      5.3, RMSE =     0.380, RMSE (%) =   310.747
Burglary:         R2 =      0.2, RMSE =     0.286, RMSE (%) =   376.370
GrandLarceny:     R2 =      9.7, RMSE =     0.522, RMSE (%) =   209.635
GrandLarcenyAuto: R2 =      1.2, RMSE =     0.195, RMSE (%) =   526.950
Fraud:            R2 =      1.2, RMSE =     0.155, RMSE (%) =   665.719
Forgery:          R2 =      1.8, RMSE =     0.199, RMSE (%) =   560.366
Arson:            R2 =      0.1, RMSE =     0.071, RMSE (%) =  1604.211
Drugs:            R2 =      1.7, RMSE =     0.175, RMSE (%) =   624.149
Weapons:          R2 =      3.4, RMSE =     0.185, RMSE

In [9]:
X_test_fine.tail(50000).head(100)

Unnamed: 0,COMPLAINT_YEAR,COMPLAINT_MONTH,COMPLAINT_DAY,COMPLAINT_HOURGROUP,ADDR_PCT_CD,COMPLAINT_DAYOFWEEK,apparentTemperature,cloudCover,dewPoint,humidity,...,time,uvIndex,visibility,windBearing,windGust,windSpeed,PrecinctShapefileID,Population,Median_Household_Income,Percent_Bachelors_Degree
1343777,2013,12,19,12,79,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343778,2013,12,19,12,81,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343779,2013,12,19,12,83,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343780,2013,12,19,12,84,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343781,2013,12,19,12,88,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343782,2013,12,19,12,90,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343783,2013,12,19,12,94,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343784,2013,12,19,12,100,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343785,2013,12,19,12,101,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,
1343786,2013,12,19,12,102,3,37.28,0.0,24.30,0.48,...,1387479600,1.0,9.78,233.0,,9.02,,,,


In [10]:
y_test_fine.tail(50000).head(100)

Unnamed: 0,Homicide,Rape,Robbery,FelonyAssault,Burglary,GrandLarceny,GrandLarcenyAuto,Fraud,Forgery,Arson,Drugs,Weapons,CriminalMischief,Other,COMPLAINT_IDS
1343777,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343778,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343779,0,0,1,0,0,0,0,0,0,0,0,0,0,0,661959009
1343780,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343781,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343782,0,0,0,0,0,1,0,0,0,0,0,0,0,0,381613314
1343783,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343784,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343785,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1343786,0,0,0,0,0,2,0,0,0,0,0,0,0,0,892296619 960283951
