In [None]:
import numpy as np
import os
import pandas as pd

In [None]:
score_predictor_dataframe_directory = "Score Predictor DataFrames"

if score_predictor_dataframe_directory not in os.listdir():
    os.mkdir(score_predictor_dataframe_directory)

In [None]:
epsilon_value = 300
nms_threshold = .2

In [None]:
models = ["unfrozen", "frozen_v1", "frozen_v2"]
dataset_types = ["training", "validation", "testing"]

csv_path ="Grid Search DataFrames"

In [None]:
for model_name in models:
    print("Generating Dataframe for model:", model_name)

    for dataset_type in dataset_types:
        print("Using dataset:", dataset_type)

        # Read in Data
        cluster_df = pd.read_csv(f"{csv_path}\centriod_info_{model_name}_{dataset_type}.csv")
        scores_df = pd.read_csv(f"{csv_path}\grid_search_{model_name}_{dataset_type}.csv")

        # Filter out to specified NMS threshold and DBSCAN search radius
        cluster_df = cluster_df[cluster_df["Epsilon Value"] == epsilon_value]
        scores_df = scores_df[scores_df["IOU Threshold"] == nms_threshold]

        merge_columns = ["File Name", "Count Difference"]

        # Calcualte minimum difference from actual score
        minimum_differences =(
            scores_df
            .groupby("File Name")
            .min()
            .reset_index()
            [merge_columns]
        )

        # Filter score df to only have best scores
        scores_df = (
            scores_df
            .merge(
                minimum_differences, 
                how="inner", 
                right_on=merge_columns, 
                left_on=merge_columns
            )
            .drop(columns=["Unnamed: 0"])
        )

        # Combine cluster and score data
        score_predictor_df = (
            scores_df
            .merge(
                cluster_df, 
                left_on="File Name", 
                right_on= "Image Name"
            )
            .drop(
                columns=[
                    "Image Name", 
                    "IOU Threshold", 
                    "Epsilon Value"
                ]
            )
        )

        # Save to csv

        # Raw Data
        score_predictor_df.to_csv(f"{score_predictor_dataframe_directory}/{model_name}_{dataset_type}_ep_{epsilon_value}_nms_{nms_threshold}_raw.csv", index=False)

        # Aggregated to have min score
        score_predictor_df.loc[score_predictor_df.groupby("File Name")["Score"].idxmin()].to_csv(f"{score_predictor_dataframe_directory}/{model_name}_{dataset_type}_ep_{epsilon_value}_nms_{nms_threshold}_min.csv", index=False)
        
        # Aggregated to have max score
        score_predictor_df.loc[score_predictor_df.groupby("File Name")["Score"].idxmax()].to_csv(f"{score_predictor_dataframe_directory}/{model_name}_{dataset_type}_ep_{epsilon_value}_nms_{nms_threshold}_max.csv", index=False)
        
        # Aggregated to have average score
        score_predictor_df.groupby("File Name").mean().reset_index().to_csv(f"{score_predictor_dataframe_directory}/{model_name}_{dataset_type}_ep_{epsilon_value}_nms_{nms_threshold}_mean.csv", index=False)
