# Experiment description
## Hypothesis: 
We can predict with an average hitrate higher than 0.1 (10% coverage) the aggressive behavior occurrence over a week.

## Method: 

Estimate hitrates over predictions between 2017-01-22 and 2017-01-29. Experiment parameters:
- Use city trained model between 2017-01-01 and 2017-01-21
- Predict for each day in the following hours: [3, 9, 15, 21]
- Estimate hitrate and PAI using 5% and 10% coverage
- Estimate average PAI

## Results:
- Time elapsed experiment: 56 hours aprox
- Hitrates were lower than 0.1 in all predictions.
- average PAI 5%:  0.5338693700851119
- average PAI 10%:  0.47119505453105576

In [1]:
%matplotlib inline
import pandas as pd
import pickle
import dateutil.parser
import pyproj
import open_cp
from PIL import Image
import datetime
import matplotlib.pyplot as plt
import numpy as np

In [2]:
import import_ipynb
import training_sepp_builtin

importing Jupyter notebook from training_sepp_builtin.ipynb
Collecting https://github.com/QuantCrimAtLeeds/PredictCode/zipball/master
  Using cached https://github.com/QuantCrimAtLeeds/PredictCode/zipball/master
Building wheels for collected packages: opencp
  Building wheel for opencp (setup.py) ... [?25ldone
[?25h  Created wheel for opencp: filename=opencp-0.2.0-cp37-none-any.whl size=280058 sha256=01e613559bf444fd44f99edbb4c35fd9b1bc5c8150e639452cf80c9562f3c3c5
  Stored in directory: /tmp/pip-ephem-wheel-cache-0wvnwlui/wheels/9a/f6/1f/9ea8bd2ac044e7df5c432f658ba156f0cfc3540891a5bad62a
Successfully built opencp
Collecting https://github.com/MatthewDaws/SEPP/zipball/master
  Using cached https://github.com/MatthewDaws/SEPP/zipball/master
Building wheels for collected packages: sepp
  Building wheel for sepp (setup.py) ... [?25ldone
[?25h  Created wheel for sepp: filename=sepp-0.9.0-cp37-none-any.whl size=37591 sha256=9f97f36f45af76609d2c357e3d541ff10e7f3bf4ce686909996ab6d91e0d61c8

Failed to load 'descartes' package.


In [64]:
def selectDataPrediction(nameLoc,dateIni,dateEnd):
    if nameLoc == 'city':
        dfloc = df
    else:
        dfloc = df[df["LOCALIDAD"]==nameLoc]
            
    dfloc["FECHA_HORA"]=dfloc["HORA"].astype(str)
    valHour = dfloc["FECHA_HORA"].values
    valDate = dfloc["FECHA"].values
    timesVals = [];
    k = 0;
    for i in valHour:
        if len(i)<=2:
            timeStr = valDate[k] +" " + "00:"+i+":00"
        else:
            timeStr = valDate[k] +" " + i[:-2]+":"+i[-2:]+":00"
        
        k = k + 1

        timesVals.append(timeStr)
    dfloc["FECHA_HORA"] = timesVals;
    
    dfloc["FECHA_HORA_TS"] = dfloc.FECHA_HORA.map(dateutil.parser.parse)
    dfFilter = dfloc[(dfloc.FECHA_HORA_TS > dateIni) & (dfloc.FECHA_HORA_TS < dateEnd)]
    timestamps = dfFilter.FECHA_HORA_TS
    
    print("TimeStamps")
    print(type(timestamps))
    print([timestamps])

    xcoords, ycoords = (dfFilter.LONGITUD.values,dfFilter.LATITUD.values)
    proj = pyproj.Proj(init="EPSG:3116")
    xcoords, ycoords = proj(xcoords,ycoords)
    
    points_crime = open_cp.TimedPoints.from_coords(timestamps, xcoords, ycoords)
    
    #Generando Grilla para los algoritmos. 
    maxx = max(xcoords)
    minx = min(xcoords)
    maxy = max(ycoords)
    miny = min(ycoords)
    #print(maxx,minx,maxy,miny)

    region = open_cp.RectangularRegion(xmin=minx, xmax=maxx, ymin=miny, ymax=maxy)
    #print(region)
    
    return (points_crime,region)


In [65]:
def load_model(localidad, experiment_name):
    custom_path = path+'/aggressive_behavior_model/SEPP/'+'pkl/TrainRina_'+experiment_name+'.pkl'
    infile = open(custom_path,'rb')
    modelCrime = pickle.load(infile)
    infile.close()
    return modelCrime

In [68]:
def run_prediction(localidad,modelCrime,time_window_prediction,predict_time):
    points_crime,region = selectDataPrediction(localidad,time_window_prediction['start'],time_window_prediction['end'])
    modelCrime.data = points_crime 
    prediction = modelCrime.predict(predict_time)
    gridpred = open_cp.predictors.GridPredictionArray.from_continuous_prediction_region(prediction, region, 150, 150)
    hitrates = open_cp.evaluation.hit_rates(gridpred, points_crime, [0.05,0.1])
    return gridpred, hitrates

In [69]:
def plot_gridpred(localidad,predict_time,gridpred):
    fig, ax = plt.subplots(figsize=(20,10))
    m = ax.pcolormesh(*gridpred.mesh_data(), gridpred.intensity_matrix, cmap="CMRmap_r")
    ax.set_title("Predicción localidad: "+localidad+'; fecha: '+str(predict_time))
    fig.colorbar(m, ax=ax)

## Select data

In [6]:
path = '/u01/user8/Documents/Riñas'

In [18]:
df = pd.read_csv(path+'/datasets/verify_enrich_nuse_29112019.csv')

## Test prediction

In [7]:
localidad = 'city'
parameters = {"time_bw":144,"space_bw":50,"time_cutoff":90,"space_cutoff":500}
train_initial_date = '2017-01-01'
train_final_date = '2017-01-22'
trained_model_name = str(localidad)+'_'+train_initial_date+'_'+train_final_date+'_time_cutoff_'+str(parameters['time_cutoff'])

In [8]:
initial_date_prediction = '2017-01-22'
final_date_prediction = '2017-01-29'

In [70]:
dates_to_predict = []
for i in range(7):
    current_datetime = datetime.datetime.strptime(initial_date_prediction,'%Y-%m-%d')+datetime.timedelta(days=i)
    dates_to_predict.append(current_datetime.strftime('%Y-%m-%d'))
    
dates_to_predict

['2017-01-22',
 '2017-01-23',
 '2017-01-24',
 '2017-01-25',
 '2017-01-26',
 '2017-01-27',
 '2017-01-28']

In [None]:
flagF = True
modelCrime = load_model(localidad,trained_model_name)

for initial_date in dates_to_predict:
    hours_timedelta = [3, 9, 15, 21]
    end_date = datetime.datetime.strptime(initial_date,'%Y-%m-%d')+datetime.timedelta(days=1)
    end_date = end_date.strftime('%Y-%m-%d')
    time_window_prediction = {'start':initial_date,'end':end_date}

    for hour_value in hours_timedelta:
        current_date_prediction = time_window_prediction['start']
        predict_time = datetime.datetime.strptime(current_date_prediction,'%Y-%m-%d')+datetime.timedelta(hours=hour_value)
        gridpred, hitrates = run_prediction(localidad,modelCrime,time_window_prediction,predict_time)   
        plot_gridpred(localidad,predict_time,gridpred) ## uncomment to plot gridpred
        predict_time = predict_time.strftime('%Y-%m-%d %H:%M:%S')
        if flagF==True:
            flagF = False
            hitrates_values = np.array([localidad,predict_time,hitrates[0.05],hitrates[0.1]]);
        else:
            hitrates_values = np.vstack((hitrates_values, [localidad,predict_time,hitrates[0.05],hitrates[0.1]]))


In [None]:
df_hitrates = pd.DataFrame(hitrates_values, columns=['localidad','prediction_time','hitrate_0.05','hitrate_0.1'])

### Estimate PAI

In [79]:
df_hitrates['PAI_0.05']=pd.to_numeric(df_hitrates['hitrate_0.05'])/0.05
df_hitrates['PAI_0.1']=pd.to_numeric(df_hitrates['hitrate_0.1'])/0.1

In [80]:
df_hitrates

Unnamed: 0,localidad,prediction_time,hitrate_0.05,hitrate_0.1,PAI_0.05,PAI_0.1
0,city,2017-01-22 03:00:00,0.0207253886010362,0.0382124352331606,0.414508,0.382124
1,city,2017-01-22 09:00:00,0.0161917098445595,0.0284974093264248,0.323834,0.284974
2,city,2017-01-22 15:00:00,0.0148963730569948,0.038860103626943,0.297927,0.388601
3,city,2017-01-22 21:00:00,0.0148963730569948,0.0382124352331606,0.297927,0.382124
4,city,2017-01-23 03:00:00,0.0288858321870701,0.0426409903713892,0.577717,0.42641
5,city,2017-01-23 09:00:00,0.0261348005502063,0.0275103163686382,0.522696,0.275103
6,city,2017-01-23 15:00:00,0.030261348005502,0.046767537826685,0.605227,0.467675
7,city,2017-01-23 21:00:00,0.0343878954607978,0.0701513067400275,0.687758,0.701513
8,city,2017-01-24 03:00:00,0.0212464589235127,0.0297450424929178,0.424929,0.29745
9,city,2017-01-24 09:00:00,0.0382436260623229,0.0453257790368272,0.764873,0.453258


In [83]:
experiment_name = 'city_hitrates_2017-01-22_2017-01-28'
hitrates_outfile = open(path+'/aggressive_behavior_model/SEPP/'+'pkl/PredictionRina_'+experiment_name+'.pkl','wb')
pickle.dump(df_hitrates, hitrates_outfile)
hitrates_outfile.close()

In [84]:
print('average PAI 5%: ', df_hitrates['PAI_0.05'].mean())
print('average PAI 10%: ', df_hitrates['PAI_0.1'].mean())

average PAI 5%:  0.5338693700851119
average PAI 10%:  0.47119505453105576
