## Method
- Get prediction risk map based only on NUSE DB (experiment_10_2_SEPPexp...)
- Measure hit rate and PAI considering different ground truth scenarios:
    1. SIEDCO
    2. RNMC
    3. NUSE
- Check if results are equivalent to prediction using sum and average operators

## Hypothesis:
Multimodal prediction is driven by NUSE prediction.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sys
sys.path.append("..")
import geojson
from shapely.geometry import Point
from shapely.geometry.polygon import Polygon
from pyproj import Proj, transform
import pickle
import open_cp

In [2]:
from services import prediction_metrics
from services.prediction_experiment import PredictionExperiment
from services.process_data import ProcessData

Failed to import `rtree`.
Failed to import `rtree`.


## Built-in

In [3]:
def get_hit_rate_from_dict(row,column,coverage):
    return row[column][coverage]

## Load prediction results

In [4]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_siedco_prediction.pkl','rb')
loaded_siedco = pickle.load(infile)
infile.close()

In [5]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_rnmc_prediction.pkl','rb')
loaded_rnmc = pickle.load(infile)
infile.close()

In [6]:
infile = open('/Users/anamaria/Desktop/dev/security_project/aggressive_behavior_model/pkl/experiment_seppexp_10_2_nuse_prediction.pkl','rb')
loaded_nuse = pickle.load(infile)
infile.close()

In [7]:
loaded_siedco = loaded_siedco.rename(columns={'prediction': 'prediction_siedco', 'eval_pts': 'eval_pts_siedco'})
loaded_rnmc = loaded_rnmc.rename(columns={'prediction': 'prediction_rnmc', 'eval_pts': 'eval_pts_rnmc'})
loaded_nuse = loaded_nuse.rename(columns={'prediction': 'prediction_nuse', 'eval_pts': 'eval_pts_nuse'})

## Compute maximum prediction

In [8]:
result = pd.concat([loaded_siedco, loaded_rnmc, loaded_nuse], axis=1)

## Hit rate and PAI

#### Real percentage of city coverage over rectangular region (based on script experiment_11_1_get_rectangular_city_region):

In [9]:
city_percentage_on_region = 0.26

In [10]:
## coverages are represented in % (NOT IN PROPORTION)
coverages = [2,4,6,8,10,12,14,16,18,20]
### this approach is only used to open_cp hit rate implementation method makes sense
real_coverages_city = list(map(lambda c: round((c/100)*city_percentage_on_region*100, 1), coverages))

In [11]:
## coverages are represented in % (NOT IN PROPORTION)
real_coverages_city

[0.5, 1.0, 1.6, 2.1, 2.6, 3.1, 3.6, 4.2, 4.7, 5.2]

### SIEDCO ground truth

In [12]:
df_result = result.copy()

In [13]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_siedco'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_siedco'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [14]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [15]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [16]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                     0.020564
hit_rate_TP                     0.000000
PAI_TP                          0.000000
hit_rate_default_coverage_2     0.176642
PAI_default_coverage_2          8.832112
hit_rate_default_coverage_4     0.306670
PAI_default_coverage_4          7.666738
hit_rate_default_coverage_6     0.449193
PAI_default_coverage_6          7.486547
hit_rate_default_coverage_8     0.552751
PAI_default_coverage_8          6.909393
hit_rate_default_coverage_10    0.618565
PAI_default_coverage_10         6.185654
hit_rate_default_coverage_12    0.697717
PAI_default_coverage_12         5.814308
hit_rate_default_coverage_14    0.719086
PAI_default_coverage_14         5.136331
hit_rate_default_coverage_16    0.719086
PAI_default_coverage_16         4.494290
hit_rate_default_coverage_18    0.719086
PAI_default_coverage_18         3.994924
hit_rate_default_coverage_20    0.719086
PAI_default_coverage_20         3.595432
dtype: float64


### RNMC ground truth

In [17]:
df_result = result.copy()

In [18]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_rnmc'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_rnmc'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [19]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [20]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [21]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                      0.124019
hit_rate_TP                      0.009060
PAI_TP                           6.569966
hit_rate_default_coverage_2      0.200722
PAI_default_coverage_2          10.036092
hit_rate_default_coverage_4      0.358927
PAI_default_coverage_4           8.973183
hit_rate_default_coverage_6      0.496709
PAI_default_coverage_6           8.278491
hit_rate_default_coverage_8      0.603470
PAI_default_coverage_8           7.543377
hit_rate_default_coverage_10     0.677516
PAI_default_coverage_10          6.775156
hit_rate_default_coverage_12     0.759014
PAI_default_coverage_12          6.325116
hit_rate_default_coverage_14     0.791374
PAI_default_coverage_14          5.652669
hit_rate_default_coverage_16     0.791374
PAI_default_coverage_16          4.946086
hit_rate_default_coverage_18     0.791374
PAI_default_coverage_18          4.396521
hit_rate_default_coverage_20     0.791374
PAI_default_coverage_20          3.956869
dtype: float64


### NUSE ground truth

In [22]:
df_result = result.copy()

In [23]:
df_result['hitrate_default'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_nuse'],real_coverages_city,'default'), axis=1)
df_result['hitrate_TP'] = df_result.apply(lambda row: prediction_metrics.measure_hit_rates(row['prediction_nuse'],row['eval_pts_nuse'],real_coverages_city,'ground_truth_coverage'), axis=1)


In [24]:
##For hitrate_TP "true positives"
df_result['coverage_TP'] = df_result.apply(lambda row: (list(row['hitrate_TP'].keys())[0])/city_percentage_on_region, axis=1)
df_result['hit_rate_TP'] = df_result.apply(lambda row: list(row['hitrate_TP'].values())[0], axis=1)
df_result['PAI_TP'] = df_result['hit_rate_TP'] / (df_result['coverage_TP']/100)

In [25]:
##For hitrate_default
#coverages = [2.0,4.0,6.0,8.0,10.0,12.0,14.0,16.0,18.0,20.0]
column_dict = 'hitrate_default'
for c in real_coverages_city:
    new_hit_rate_column = 'hit_rate_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_hit_rate_column] = df_result.apply(lambda row: get_hit_rate_from_dict(row,column_dict,c), axis=1)

    ##PAI
    new_column = 'PAI_default_coverage_'+str(round(c/city_percentage_on_region))
    df_result[new_column] = df_result[new_hit_rate_column]/(round(c/city_percentage_on_region)/100)


In [26]:
#delete datetimes where no crimes were reported (0 crimes on ground truth -> hit-rate = -1)
df_result = df_result[df_result['hit_rate_default_coverage_2']!= -1]
print(df_result.mean())

coverage_TP                      0.365678
hit_rate_TP                      0.155397
PAI_TP                          48.633940
hit_rate_default_coverage_2      0.371721
PAI_default_coverage_2          18.586058
hit_rate_default_coverage_4      0.543793
PAI_default_coverage_4          13.594828
hit_rate_default_coverage_6      0.694170
PAI_default_coverage_6          11.569497
hit_rate_default_coverage_8      0.785099
PAI_default_coverage_8           9.813744
hit_rate_default_coverage_10     0.853635
PAI_default_coverage_10          8.536345
hit_rate_default_coverage_12     0.909905
PAI_default_coverage_12          7.582541
hit_rate_default_coverage_14     0.920220
PAI_default_coverage_14          6.572999
hit_rate_default_coverage_16     0.920220
PAI_default_coverage_16          5.751374
hit_rate_default_coverage_18     0.920220
PAI_default_coverage_18          5.112332
hit_rate_default_coverage_20     0.920220
PAI_default_coverage_20          4.601099
dtype: float64
