## Method
- Get average prediction risk map based on risk matrix obtained from different DB (experiment_10_2_SEPPexp...)
- Measure hit rate and PAI considering different ground truth scenarios:
    1. SIEDCO
    2. RNMC
    3. NUSE

## Hypothesis:
Perfomance metrics are better compared to the isolated prediction by DB (experiment_10_2_SEPPexp)

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
import geojson
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from pyproj import Proj, transform
import pickle
import open_cp

In [2]:
from services import prediction_metrics
from services.prediction_experiment import PredictionExperiment
from services.process_data import ProcessData

Failed to import `rtree`.
Failed to import `rtree`.


## Built-in

In [3]:
def get_hit_rate_from_dict(row,column,coverage):
    return row[column][coverage]

In [4]:
def average_grid_prediction(row):
    xoffset_avg = (row['prediction_siedco']._xoffset + row['prediction_rnmc']._xoffset + row['prediction_nuse']._xoffset)/3
    yoffset_avg = (row['prediction_siedco']._yoffset + row['prediction_rnmc']._yoffset + row['prediction_nuse']._yoffset)/3
    xsize_avg = (row['prediction_siedco']._xsize + row['prediction_rnmc']._xsize + row['prediction_nuse']._xsize)/3
    ysize_avg = (row['prediction_siedco']._ysize + row['prediction_rnmc']._ysize + row['prediction_nuse']._ysize)/3
    matrix_avg = (row['prediction_siedco']._matrix + row['prediction_rnmc']._matrix + row['prediction_nuse']._matrix)/3
    return open_cp.predictors.GridPredictionArray(xsize=xsize_avg,ysize=ysize_avg,matrix=matrix_avg,xoffset=xoffset_avg,yoffset=yoffset_avg)

## Load prediction results

In [5]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_siedco_prediction.pkl','rb')
loaded_siedco = pickle.load(infile)
infile.close()

In [6]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_rnmc_prediction.pkl','rb')
loaded_rnmc = pickle.load(infile)
infile.close()

In [7]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_nuse_prediction.pkl','rb')
loaded_nuse = pickle.load(infile)
infile.close()

## Compute average prediction

In [8]:
loaded_siedco = loaded_siedco.rename(columns={'prediction': 'prediction_siedco', 'eval_pts': 'eval_pts_siedco'})
loaded_rnmc = loaded_rnmc.rename(columns={'prediction': 'prediction_rnmc', 'eval_pts': 'eval_pts_rnmc'})
loaded_nuse = loaded_nuse.rename(columns={'prediction': 'prediction_nuse', 'eval_pts': 'eval_pts_nuse'})

In [9]:
result = pd.concat([loaded_siedco, loaded_rnmc, loaded_nuse], axis=1)

In [10]:
result['average_prediction'] = result.apply(lambda row: average_grid_prediction(row), axis=1)

## Hit rate and PAI

#### Real percentage of city coverage over rectangular region (based on script experiment_11_1_get_rectangular_city_region):

In [11]:
city_percentage_on_region = 0.26

In [12]:
## coverages are represented in % (NOT IN PROPORTION)
coverages = [2,4,6,8,10,12,14,16,18,20]
### this approach is only used to open_cp hit rate implementation method makes sense
real_coverages_city = list(map(lambda c: round((c/100)*city_percentage_on_region*100, 1), coverages))

In [13]:
## coverages are represented in % (NOT IN PROPORTION)
real_coverages_city

[0.5, 1.0, 1.6, 2.1, 2.6, 3.1, 3.6, 4.2, 4.7, 5.2]

### SIEDCO ground truth

In [14]:
df_result = result.copy()

In [15]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_siedco'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_siedco'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [16]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [17]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [18]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                      0.020564
hit_rate_TP                      0.000000
PAI_TP                           0.000000
hit_rate_default_coverage_2      0.235444
PAI_default_coverage_2          11.772219
hit_rate_default_coverage_4      0.404008
PAI_default_coverage_4          10.100200
hit_rate_default_coverage_6      0.527552
PAI_default_coverage_6           8.792539
hit_rate_default_coverage_8      0.645078
PAI_default_coverage_8           8.063473
hit_rate_default_coverage_10     0.721058
PAI_default_coverage_10          7.210582
hit_rate_default_coverage_12     0.805248
PAI_default_coverage_12          6.710403
hit_rate_default_coverage_14     0.851634
PAI_default_coverage_14          6.083101
hit_rate_default_coverage_16     0.874887
PAI_default_coverage_16          5.468042
hit_rate_default_coverage_18     0.874887
PAI_default_coverage_18          4.860482
hit_rate_default_coverage_20     0.874887
PAI_default_coverage_20          4.374434
dtype: float64


### RNMC ground truth

In [21]:
df_result = result.copy()

In [22]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_rnmc'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_rnmc'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [23]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [24]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [25]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                      0.124019
hit_rate_TP                      0.083829
PAI_TP                          68.749653
hit_rate_default_coverage_2      0.411859
PAI_default_coverage_2          20.592951
hit_rate_default_coverage_4      0.591152
PAI_default_coverage_4          14.778805
hit_rate_default_coverage_6      0.723201
PAI_default_coverage_6          12.053354
hit_rate_default_coverage_8      0.803966
PAI_default_coverage_8          10.049581
hit_rate_default_coverage_10     0.868945
PAI_default_coverage_10          8.689451
hit_rate_default_coverage_12     0.910406
PAI_default_coverage_12          7.586716
hit_rate_default_coverage_14     0.938684
PAI_default_coverage_14          6.704884
hit_rate_default_coverage_16     0.949142
PAI_default_coverage_16          5.932135
hit_rate_default_coverage_18     0.949142
PAI_default_coverage_18          5.273009
hit_rate_default_coverage_20     0.949142
PAI_default_coverage_20          4.745708
dtype: float64


### NUSE ground truth

In [26]:
df_result = result.copy()

In [27]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_nuse'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['average_prediction'],row['eval_pts_nuse'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [28]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [29]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [30]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                      0.365678
hit_rate_TP                      0.138648
PAI_TP                          43.013506
hit_rate_default_coverage_2      0.348386
PAI_default_coverage_2          17.419321
hit_rate_default_coverage_4      0.524532
PAI_default_coverage_4          13.113293
hit_rate_default_coverage_6      0.679101
PAI_default_coverage_6          11.318356
hit_rate_default_coverage_8      0.770196
PAI_default_coverage_8           9.627448
hit_rate_default_coverage_10     0.838144
PAI_default_coverage_10          8.381442
hit_rate_default_coverage_12     0.889274
PAI_default_coverage_12          7.410618
hit_rate_default_coverage_14     0.934152
PAI_default_coverage_14          6.672511
hit_rate_default_coverage_16     0.948344
PAI_default_coverage_16          5.927149
hit_rate_default_coverage_18     0.948344
PAI_default_coverage_18          5.268577
hit_rate_default_coverage_20     0.948344
PAI_default_coverage_20          4.741719
dtype: float64


In [32]:
loaded_siedco['prediction_siedco'][0]

GridPredictionArray(offset=(958645.8182116301,904338.0678953262), size=150.0x150.0, risk intensity size=343x816)

In [33]:
loaded_rnmc['prediction_rnmc'][0]

GridPredictionArray(offset=(958645.8182116301,904338.0678953262), size=150.0x150.0, risk intensity size=343x816)

In [34]:
loaded_nuse['prediction_nuse'][0]

GridPredictionArray(offset=(958645.8182116301,904338.0678953262), size=150.0x150.0, risk intensity size=343x816)