In [1]:
import numpy as np
import pandas as pd
import seaborn
import matplotlib.pyplot as plt
import datetime
import csv
import os
from sklearn.metrics import r2_score, mean_squared_error

%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from modules.nypd_data import read_orig_file
from modules.nypd_data import filter_raw_data
from modules.nypd_data import save_dated_felonies
from modules.nypd_data import load_dated_felonies
from modules.nypd_data import save_clean_felonies
from modules.nypd_data import load_clean_felonies
from modules.nypd_data import add_offense_category
from modules.nypd_data import add_datetime_columns
from modules.nypd_data import pivot_felonies
from modules.nypd_data import save_pivoted_felonies
from modules.nypd_data import load_pivoted_felonies

In [3]:
from modules.prediction import load_all_data
from modules.prediction import precrime_train_test_split, create_test_period, precrime_train_test_split
from modules.prediction import split_into_X_y
from modules.prediction import load_splits
from modules.prediction import create_all_splits
from modules.prediction import sample_model
from modules.poly_ridge import poly_ridge_model
from modules.fancy_time_series import fancy_time_series_model
from modules.eval_model import eval_predictions


In [4]:
from modules.nypd_data import load_clean_felonies
from modules.nyc_shapefiles import read_nyc_shapefiles
from modules.weather import get_precinct_centroids
from modules.weather import get_mean_latlon
from modules.weather import write_weather_data
from modules.weather import read_weather_data
from modules.weather import read_api_key

In [5]:
# Generating file for 2017
# Get the ytd crime file from the open data website and 
# store it in the folder precrime_data/raw
save_dated_felonies('../precrime_data/raw/'
                    , 'NYPD_Complaint_Data_Current_YTD.csv'
                   , '../precrime_data/dated_felonies_2017.csv')
dated_felonies = load_dated_felonies('../precrime_data/'
                                     , 'dated_felonies_2017.csv')
dated_felonies = dated_felonies[dated_felonies['COMPLAINT_DATETIME'] >= '2017-01-01 00:00:00']
dated_felonies.to_csv('../precrime_data/clean_felonies_2017.csv')
save_clean_felonies('../precrime_data/', 'dated_felonies_2017.csv'
                   , '../precrime_data/clean_felonies_2017.csv')

nypd_data_2017 = load_clean_felonies('../precrime_data/'
                                     , 'clean_felonies_2017.csv')
add_offense_category(nypd_data_2017)
add_datetime_columns(nypd_data_2017)
pivoted_2017 = pivot_felonies(nypd_data_2017)
save_pivoted_felonies(nypd_data_2017, data_path='../precrime_data/'
                      , pivot_file='pivoted_felonies_2017.csv')

    

Starting (2017-12-05 21:00:27)...
Saving filtered output (2017-12-05 21:00:30)...
Done (2017-12-05 21:00:31)
Starting (2017-12-05 21:00:55)...
Done (2017-12-05 21:01:17)


In [6]:
dated_felonies

Unnamed: 0_level_0,COMPLAINT_DATETIME,REPORT_DATE,KY_CD,OFNS_DESC,BORO_NM,ADDR_PCT_CD,Latitude,Longitude
CMPLNT_NUM,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
417734073,2017-06-30 23:40:00,2017-06-30,121,CRIMINAL MISCHIEF & RELATED OF,BROOKLYN,90,40.701165,-73.942633
220585856,2017-06-30 23:15:00,2017-06-30,105,ROBBERY,MANHATTAN,9,40.729082,-73.988788
361182121,2017-06-30 22:58:00,2017-06-30,106,FELONY ASSAULT,BROOKLYN,94,40.719473,-73.963295
294397333,2017-06-30 22:55:00,2017-06-30,106,FELONY ASSAULT,MANHATTAN,14,40.754849,-73.984120
701367321,2017-06-30 22:50:00,2017-06-30,113,FORGERY,MANHATTAN,14,40.757241,-73.989794
729905802,2017-06-30 22:40:00,2017-06-30,105,ROBBERY,MANHATTAN,33,40.835319,-73.942040
219940245,2017-06-30 22:40:00,2017-06-30,109,GRAND LARCENY,BROOKLYN,81,40.700085,-73.940787
275147038,2017-06-30 22:20:00,2017-06-30,106,FELONY ASSAULT,BRONX,40,40.818396,-73.920522
588989752,2017-06-30 22:15:00,2017-06-30,113,FORGERY,BROOKLYN,77,40.668806,-73.931122
509041172,2017-06-30 22:00:00,2017-06-30,109,GRAND LARCENY,MANHATTAN,26,40.815493,-73.958614


In [7]:
# Creating weather data until 2017-11-30 

# Before running: execute "tar -xvf weather_hist.tar.gz"
# from the root directory of the git repo
#
# To make the files from scratch (not necessary):
#
#nypd_data = load_clean_felonies()
#precinct_dict, tract_dict, merged_census_info = read_nyc_shapefiles()
#centroids = get_precinct_centroids(precinct_dict)
#nyc_mean_latlon = get_mean_latlon(centroids)
#nypd_dates = nypd_data['COMPLAINT_DATETIME'].dt.date.unique()
#hours = range(2,26,4)
#darksky_api_key = read_api_key()

In [8]:
#new_nypd_dates = []
#one_day = datetime.timedelta(days=1)
#cur_day = nypd_dates[-1] + one_day
#while cur_day.month < 12:
#    new_nypd_dates.append(cur_day)
#    cur_day += one_day

In [9]:
#write_weather_data(new_nypd_dates, hours, darksky_api_key, nyc_mean_latlon, append_output=True)

In [10]:
crime_data_train = load_all_data()

crime_data_test = load_all_data(data_path='../precrime_data/'
                           , pivot_file='pivoted_felonies_2017.csv' )
crime_data = pd.concat([crime_data_train, crime_data_test])


  mask |= (ar1 == a)
  exec(code_obj, self.user_global_ns, self.user_ns)


In [11]:
str_date = datetime.datetime.strptime('2017-01-01', '%Y-%m-%d')
end_date = datetime.datetime.strptime('2017-07-01', '%Y-%m-%d')
test_times = create_test_period(str_date,end_date)
X_train, X_test, y_train, y_test = precrime_train_test_split(crime_data, test_times)

In [12]:
test_times

Unnamed: 0_level_0,TEST_YEAR,TEST_MONTH,TEST_DAY,TEST_HOURGROUP
Index,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,2017,1,1,0
1,2017,1,1,4
2,2017,1,1,8
3,2017,1,1,12
4,2017,1,1,16
5,2017,1,1,20
6,2017,1,2,0
7,2017,1,2,4
8,2017,1,2,8
9,2017,1,2,12


In [13]:
y_poly = poly_ridge_model(X_train, y_train, X_test)




In [14]:
y_ts = fancy_time_series_model(X_train, y_train, X_test, y_test)

y_hybrid = (y_poly + y_ts) / 2

ValueError: cannot set a Timestamp with a non-timestamp

In [None]:
eval_predictions(X_test, y_test, y_hybrid)