In [None]:
import pandas as pd
import numpy as np

from pathlib import Path

from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import KFold

In [None]:
accuracies_file: Path = Path("../results/cnn_hog_accuracies.csv").resolve()

accuracies_df: pd.DataFrame = pd.read_csv(accuracies_file.as_posix())

accuracies_df["Mean Accuracy"] = accuracies_df[accuracies_df.columns[2:]].mean(axis=1)

dataset_mean_series: pd.Series = accuracies_df.groupby("Datasets")["Mean Accuracy"].mean().sort_values(ascending=False)

dataset_mean_series.head(6)

In [None]:
accuracies_file_mapping: dict[str, list[float]] = dict()

for filename in dataset_mean_series.head(6).index:
    file: Path = Path(f"../outputs/pca_{filename}").resolve()
    
    df: pd.DataFrame = pd.read_csv(file.as_posix())
    
    features_df: pd.DataFrame = df.iloc[:, 3:]
    metadata_df: pd.DataFrame = df.iloc[:, :3]
    animal_series: pd.Series = df.animal

    k_list: list[int] = [k for k in range(1, 11)]

    accuracies: list[float] = list()

    for k in k_list:
        kfold: KFold = KFold(n_splits=10, random_state=1, shuffle=True)

        knn: KNeighborsClassifier = KNeighborsClassifier(n_neighbors=k, metric="euclidean")
        
        scores = cross_val_score(knn, features_df, animal_series, scoring="accuracy", cv=kfold)
        accuracy: float = np.mean(scores)
        accuracies.append(accuracy)
    
    accuracies_file_mapping[file.name] = accuracies


In [None]:
sorted_mapping: dict[str, list[float]] = dict(
    sorted(
        accuracies_file_mapping.items()
    )
)

for filename, accuracies in sorted_mapping.items():
    
    print(f"Dataset: {filename}")
    print("Accuracies: ", end="")
    
    for accuracy in accuracies:
        print(f"{accuracy:.2f}", end=";")
    
    print("\n")