# Evaluation
### The saved predictions are used to calculate the evaluation metrics and assess the model. The evaluaiton metrics used are
* Mean Absolute Error (MAE)
* Mean Square Error (MSE)
* Root Mean Square Error (RMSE)
* R-Squared

#### Finally the plots of the Actual values vs. Predicted values are also shown for all crime types

In [None]:
# Import all required packages
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import math
from sklearn.metrics import r2_score 
from sklearn.model_selection import train_test_split
from natsort import natsorted

In [None]:
# Set the working directory to root directory of the project 
os.chdir("/data/private/THESIS/")
base_path = os.getcwd()
base_path

In [None]:
def calculate_accuracy(dataframe,col_list):
    ''' Takes the dataframe with actual and predicted value columns as input and return the evaulation 
        metrics MAE, MSE, RMSE and R-squared as a dataframe.
        Parameters:
        datframe: Pandas dataframe with actual and predicted values columns.
        col_list: Columns in the dataframe for which the evaluation has to be done.'''
    
    dic = {}
    for i in col_list:
        error = dataframe[i] - dataframe[i+"_p"]
        error_sq = error*error
        mse = error_sq.mean()
        rmse = math.sqrt(mse)
        mae = abs(error).mean()
        r2 = r2_score(dataframe[i],dataframe[i+"_p"])
        dic[i] = {'mse':mse,'rmse':rmse,'mae':mae,'r2':r2}
    error_df = pd.DataFrame(dic)
    return error_df

In [None]:
# Load the file obtained after the prediction
final_df = pd.read_csv("FILES/resnet18_44_tr_v1.0_scaled_ds.csv")
col_list = ["burglary","robbery","o_crimes","v_crimes"]

In [None]:
# Calculate the evaluation metrics per point
f1 = calculate_accuracy(final_df,col_list)
f1

In [None]:
# Plots of actual vs. predicted values
fig, ax = plt.subplots(2,2)
ax[0,0].scatter(final_df['burglary'], final_df['burglary_p'])
ax[0,0].set_title('Burglary')
ax[0,1].scatter(final_df['robbery'], final_df['robbery_p'])
ax[0,1].set_title('Robbery')
ax[1,0].scatter(final_df['o_crimes'], final_df['o_crimes_p'])
ax[1,0].set_title('Other Thefts')
ax[1,1].scatter(final_df['v_crimes'], final_df['v_crimes_p'])
ax[1,1].set_title('Vehicle Crimes')
for a in ax.flat:
    a.set(xlabel='Actual', ylabel='Predicted')
plt.show()

In [None]:
# Group the records per cell and take mean
d = {'burglary':'mean','burglary_p':'mean','robbery':'mean','robbery_p':'mean','o_crimes':'mean','o_crimes_p':'mean','v_crimes':'mean','v_crimes_p':'mean'}
agg_df = final_df.groupby('pointid').agg(d)

# Calculate the evaluation metrics per cell
f2 = calculate_accuracy(agg_df,col_list)
f2

In [None]:
# Plots of actual vs. predicted values
fig, ax = plt.subplots(2,2)
ax[0,0].scatter(agg_df['burglary'], agg_df['burglary_p'])
# ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax[0,0].set_title('Burglary')
ax[0,1].scatter(agg_df['robbery'], agg_df['robbery_p'])
ax[0,1].set_title('Robbery')
ax[1,0].scatter(agg_df['o_crimes'], agg_df['o_crimes_p'])
ax[1,0].set_title('Other Thefts')
ax[1,1].scatter(agg_df['v_crimes'], agg_df['v_crimes_p'])
ax[1,1].set_title('Vehicle Crimes')
for a in ax.flat:
    a.set(xlabel='Actual', ylabel='Predicted')
plt.show()