In [None]:
# Supporting Libraries
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import pickle
from torchvision import ops

# Models
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import RandomForestRegressor
from typing import Dict, List, Tuple

import sys
sys.path.append("..\RCNN Notebooks")
from rcnn_utils import decode_prediction, write_to_latex

In [None]:
import warnings
warnings.filterwarnings("ignore")

In [None]:
# Parameters for Score predictor
model_name = "unfrozen"
epsilon_value = 300
nms_threshold = 0.2
data_reduction_type = "mean"

In [None]:
# Load RCNN predictions

training_prediction_path = f"../../Generated Data/{model_name}_training_predictions.pkl"
validation_prediction_path = f"../../Generated Data/{model_name}_validation_predictions.pkl"
testing_prediction_path = f"../../Generated Data/{model_name}_testing_predictions.pkl"


with open(training_prediction_path, "rb") as fp:
    training_predictions = pickle.load(fp)
with open(validation_prediction_path, "rb") as fp:
    validation_predictions = pickle.load(fp)
with open(testing_prediction_path, "rb") as fp:
    testing_predictions = pickle.load(fp)

In [None]:
# Load Score Predictor DataFrames

training_df = pd.read_csv(f"Score Predictor DataFrames/{model_name}_training_ep_{epsilon_value}_nms_{nms_threshold}_{data_reduction_type}.csv")
validation_df = pd.read_csv(f"Score Predictor DataFrames/{model_name}_validation_ep_{epsilon_value}_nms_{nms_threshold}_{data_reduction_type}.csv")
testing_df = pd.read_csv(f"Score Predictor DataFrames/{model_name}_testing_ep_{epsilon_value}_nms_{nms_threshold}_{data_reduction_type}.csv")

In [None]:
# Train Score predictors

input_columns = ['Sub-Images with Seals', 'Number of Clusters','Largest Cluster Size', 'Smallest Cluster Size']

y_train = training_df["Score"]
x_train = training_df[input_columns]

# Various Score predictor models
knn = KNeighborsRegressor()
linear_regression = LinearRegression()
random_forest = RandomForestRegressor(random_state=0)

model_names = ["KNN Regressor", "Linear Regression", "Random Forest Regression"]
models = [knn, linear_regression, random_forest]

for model in models:
    model.fit(x_train, y_train)

In [None]:
def evaluate_score_predictor_counts(df:pd.DataFrame, predictions:Dict, model) -> Tuple[float, float, float, float]:
    """Evaluates the performance of a score predictor model by calculating several metrics
       Calculated Metrics:
       - Mean Absolute Percent Error
       - Mean Absolute Error
       - Error per 10 seals
       - Total miscounted Seals

    Args:
        df (pd.DataFrame): Score predictor dataframe for a given dataset 
        predictions (Dict): RCNN predictions for a specified dataset
        model (SciKit Learn Model): Score predictor model being evaluated

    Returns:
        Tuple[float, float, float, float]: Tuple containing all metrics
    """
    # Predict Scores
    evaluation_df = df[["File Name", "Actual Count"]]
    evaluation_df["Predicted Score"] = model.predict(df[input_columns])

    predicted_counts = []

    # Get Predicted Count for Each Image
    for idx in range(evaluation_df.shape[0]):
        row = evaluation_df.iloc[idx]

        image_name = row["File Name"]
        predicted_score = row["Predicted Score"]

        # Get Predicted Count for one image
        image_count = 0
        image_predictions = predictions[image_name]

        for sub_image_predicition in image_predictions:
            boxes, scores, labels = decode_prediction(sub_image_predicition, predicted_score, nms_threshold, use_numpy=True)
            image_count += len(boxes)

        predicted_counts.append(image_count)

    evaluation_df["Predicted Count"] = predicted_counts
    absolute_difference = abs(evaluation_df["Actual Count"] - evaluation_df["Predicted Count"])

    # Metric Calculation
    mean_absolute_percent_error = (absolute_difference / evaluation_df["Actual Count"]).mean()
    mean_absolute_error = absolute_difference.mean()
    error_per_ten_seals = (mean_absolute_error * 10) / evaluation_df["Actual Count"].mean()
    total_miscounted_seals = absolute_difference.sum()

    return mean_absolute_percent_error, mean_absolute_error, error_per_ten_seals, total_miscounted_seals


def compare_score_predictors(models:List, model_names:List[str], df:pd.DataFrame, predictions:Dict) -> pd.DataFrame:
    """Generates a dataframe comparing score predictor performance

    Args:
        models (List): List of Scikit Learn models
        model_names (List[str]): Names of the scikit learn models
        df (pd.DataFrame): Score predictor dataframe for specific dataset
        predictions (Dict): RCNN predictions for specific dataset

    Returns:
        pd.DataFrame: _description_
    """
    data_frame_dict = {
        "Model Name": [],
        "Mean Absolute Percent Error": [],
        "Mean Absolute Error": [],
        "Error per 10 Seals": [],
        "Total Miscounted Seals": [],
    }

    # Iterate through each score predictor model
    for idx in range(len(models)):
        model = models[idx]
        model_name = model_names[idx]

        # Calculate metric
        mean_absolute_percent_error, mean_absolute_error, error_per_ten_seals, total_miscounted_seals = evaluate_score_predictor_counts(df, predictions, model)
        
        # Save Metrics
        data_frame_dict["Model Name"].append(model_name)
        data_frame_dict["Mean Absolute Percent Error"].append(mean_absolute_percent_error)
        data_frame_dict["Mean Absolute Error"].append(mean_absolute_error)
        data_frame_dict["Error per 10 Seals"].append(error_per_ten_seals)
        data_frame_dict["Total Miscounted Seals"].append(total_miscounted_seals)

    return pd.DataFrame(data_frame_dict)


### Training

In [None]:
training_results = compare_score_predictors(models, model_names, training_df, training_predictions)
training_results

### Validation

In [None]:
validation_results = compare_score_predictors(models, model_names, validation_df, validation_predictions)
validation_results

### Testing

In [None]:
testing_results = compare_score_predictors(models, model_names, testing_df, testing_predictions)
testing_results

In [None]:
# Save tables to latex (Incase you want to put table results in a paper)

# write_to_latex(training_results, f"{model_name}_score_predictor_training_ep_{epsilon_value}_nms_{nms_threshold}")
# write_to_latex(validation_results, f"{model_name}_score_predictor_validation_ep_{epsilon_value}_nms_{nms_threshold}")
# write_to_latex(testing_results, f"{model_name}_score_predictor_testing_ep_{epsilon_value}_nms_{nms_threshold}")