In [30]:
# Supporting Libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pickle
from torchvision import ops

# Models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor

import sys
sys.path.append("..\RCNN Notebooks")
from rcnn_utils import decode_prediction, write_to_latex

In [31]:
import warnings
warnings.filterwarnings("ignore")

In [32]:
model_name = "unfrozen"
epsilon_value = 300
nms_threshold = 0.2
data_reduction_type = "mean"

In [33]:
training_prediction_path = r"..\MetaData\{}_{}_predictions.pkl".format(model_name, "training")
validation_prediction_path = r"..\MetaData\{}_{}_predictions.pkl".format(model_name, "validation")
testing_prediction_path = r"..\MetaData\{}_{}_predictions.pkl".format(model_name, "testing")

with open(training_prediction_path, "rb") as fp:
    training_predictions = pickle.load(fp)
with open(validation_prediction_path, "rb") as fp:
    validation_predictions = pickle.load(fp)
with open(testing_prediction_path, "rb") as fp:
    testing_predictions = pickle.load(fp)

In [34]:
training_df = pd.read_csv(r"score_predictor_data_frames\{}_training_ep_{}_nms_{}_{}.csv".format(model_name, epsilon_value, nms_threshold, data_reduction_type))
validation_df = pd.read_csv(r"score_predictor_data_frames\{}_validation_ep_{}_nms_{}_{}.csv".format(model_name, epsilon_value, nms_threshold, data_reduction_type))
testing_df = pd.read_csv(r"score_predictor_data_frames\{}_testing_ep_{}_nms_{}_{}.csv".format(model_name, epsilon_value, nms_threshold, data_reduction_type))

In [35]:
input_columns = ['Sub-Images with Seals', 'Number of Clusters','Largest Cluster Size', 'Smallest Cluster Size']

In [36]:
training_df
y_train = training_df["Score"]
x_train = training_df[input_columns]

In [37]:
knn = KNeighborsRegressor()
linear_regression = LinearRegression()
random_forest = RandomForestRegressor(random_state=0)

model_names = ["KNN Regressor", "Linear Regression", "Random Forest Regression"]
models = [knn, linear_regression, random_forest]

for model in models:
    model.fit(x_train, y_train)

In [38]:
def evaluate_score_predictor_counts(df, predictions, model):
    # Predict Scores
    evaluation_df = df[["File Name", "Actual Count"]]
    evaluation_df["Predicted Score"] = model.predict(df[input_columns])

    predicted_counts = []

    # Get Predicted Count for Each Image
    for idx in range(evaluation_df.shape[0]):
        row = evaluation_df.iloc[idx]

        image_name = row["File Name"]
        predicted_score = row["Predicted Score"]

        # Get Predicted Count for one image
        image_count = 0
        image_predictions = predictions[image_name]

        for sub_image_predicition in image_predictions:
            boxes, scores, labels = decode_prediction(sub_image_predicition, predicted_score, nms_threshold, use_numpy=True)
            image_count += len(boxes)

        predicted_counts.append(image_count)

    evaluation_df["Predicted Count"] = predicted_counts
    absolute_difference = abs(evaluation_df["Actual Count"] - evaluation_df["Predicted Count"])

    # Metric Calculation
    mean_absolute_percent_error = (absolute_difference / evaluation_df["Actual Count"]).mean()
    mean_absolute_error = absolute_difference.mean()
    error_per_ten_seals = (mean_absolute_error * 10) / evaluation_df["Actual Count"].mean()
    total_miscounted_seals = absolute_difference.sum()

    return mean_absolute_percent_error, mean_absolute_error, error_per_ten_seals, total_miscounted_seals

In [39]:
def compare_score_predictors(models, model_names, df, predictions):
    data_frame_dict = {
        "Model Name": [],
        "Mean Absolute Percent Error": [],
        "Mean Absolute Error": [],
        "Error per 10 Seals": [],
        "Total Miscounted Seals": [],
    }
    for idx in range(len(models)):
        model = models[idx]
        model_name = model_names[idx]

        mean_absolute_percent_error, mean_absolute_error, error_per_ten_seals, total_miscounted_seals = evaluate_score_predictor_counts(df, predictions, model)
        
        data_frame_dict["Model Name"].append(model_name)
        data_frame_dict["Mean Absolute Percent Error"].append(mean_absolute_percent_error)
        data_frame_dict["Mean Absolute Error"].append(mean_absolute_error)
        data_frame_dict["Error per 10 Seals"].append(error_per_ten_seals)
        data_frame_dict["Total Miscounted Seals"].append(total_miscounted_seals)

    return pd.DataFrame(data_frame_dict)


### Training

In [40]:
training_results = compare_score_predictors(models, model_names, training_df, training_predictions)
training_results

Unnamed: 0,Model Name,Mean Absolute Percent Error,Mean Absolute Error,Error per 10 Seals,Total Miscounted Seals
0,KNN Regressor,0.689395,78.58,8.262881,3929.0
1,Linear Regression,0.72005,79.54,8.363828,3977.0
2,Random Forest Regression,0.633953,78.26,8.229232,3913.0


### Validation

In [41]:
validation_results = compare_score_predictors(models, model_names, validation_df, validation_predictions)
validation_results

Unnamed: 0,Model Name,Mean Absolute Percent Error,Mean Absolute Error,Error per 10 Seals,Total Miscounted Seals
0,KNN Regressor,0.669981,82.625,7.15368,1322.0
1,Linear Regression,0.685542,83.3125,7.213203,1333.0
2,Random Forest Regression,0.669981,82.625,7.15368,1322.0


### Testing

In [42]:
testing_results = compare_score_predictors(models, model_names, testing_df, testing_predictions)
testing_results

Unnamed: 0,Model Name,Mean Absolute Percent Error,Mean Absolute Error,Error per 10 Seals,Total Miscounted Seals
0,KNN Regressor,0.561938,18.846154,6.25,245.0
1,Linear Regression,0.573502,19.230769,6.377551,250.0
2,Random Forest Regression,0.557542,18.846154,6.25,245.0


In [43]:
write_to_latex(training_results, f"{model_name}_score_predictor_training_ep_{epsilon_value}_nms_{nms_threshold}")
write_to_latex(validation_results, f"{model_name}_score_predictor_validation_ep_{epsilon_value}_nms_{nms_threshold}")
write_to_latex(testing_results, f"{model_name}_score_predictor_testing_ep_{epsilon_value}_nms_{nms_threshold}")