In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sklearn

In [None]:
path = "/mnt/c/Users/olivi/OneDrive - Amherst College/6 Summer 2025/surface-detection"

##### Confusion heatmap

In [None]:
# confusion matrix heatmap, normalized over true (rows)
def confusion_heatmap(model="ver6", fsetname="train", setname="Training", dataset=1):
    # set= pd.read_csv(f"{path}/logs/set-results/{model}_accuracy_{fsetname}_set{dataset}.txt")
    # set = pd.read_csv(f"{path}/logs/set-results/set{dataset}_{fsetname}.txt")
    set = pd.read_csv(f"{path}/logs/set-results/set{dataset}_{fsetname}_concat.txt")
    sklearn.metrics.ConfusionMatrixDisplay.from_predictions(set["True"], set["Predicted"], 
                                                            # cmap='cividis', 
                                                            cmap = 'BuPu',
                                                            display_labels=["A", "B", "C", "D", "E"],
                                                            normalize="true")
    plt.xlabel("Predicted location")
    plt.ylabel("True location")
    plt.title(f"{model.capitalize()} Predictions on {setname.capitalize()} Set (Set {dataset})")
    plt.savefig(f"{path}/figures/set-results/set{dataset}_confusion_{fsetname}.png")
    # plt.show()

In [None]:
# confusion matrix heatmap, normalized over true (rows)
def accuracy(model="ver6", fsetname="train", setname="Training", dataset=1):
    set = pd.read_csv(f"{path}/logs/set-results/set{dataset}_{fsetname}_concat.txt")
    confusion = sklearn.metrics.confusion_matrix(set["True"], set["Predicted"])
    print(np.trace(confusion) / np.sum(confusion))

In [None]:
set = 9
model = ""
accuracy(model, "train", "training", set)
# confusion_heatmap(model, "val", "validation", set)
accuracy(model, "test", "test", set)

In [None]:
set = 9
model = "basic"
confusion_heatmap(model, "train", "training", set)
# confusion_heatmap(model, "val", "validation", set)
confusion_heatmap(model, "test", "test", set)

##### Accuracy, Precision and Recall Comparisons

In [None]:
versions = ["ver2", "ver3", "ver4", "ver5a", "ver5b", "ver6", "ver7"]
sets = ["train", "val", "test"]
metrics = ["accuracy", "precision", "recall"]
trials = [1, 2]

In [None]:
def comparison(metric="accuracy", trial=1):
    x = ["A", "B", "C", "D", "E"]

    fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4), sharey=True)
    for k in range(len(versions)):
        model = versions[k]
        y = []
        if model == "ver2" and trial == 1:
            continue
        for i in range(len(sets)):
            guesses = pd.read_csv(f"{path}/logs/nn-results/{model}_accuracy_{sets[i]}_{trial}.txt")
            confusion = sklearn.metrics.confusion_matrix(guesses["True"], guesses["Predicted"])
            if metric == "precision":
                # precision = TP / (TP + FP)
                y.append([confusion[j][j] / np.sum(confusion[:, j]) for j in range(len(x))])
            elif metric == "recall":
                # recall = TP / (TP + FN)
                y.append([confusion[j][j] / np.sum(confusion[j]) for j in range(len(x))])
            else:
                # accuracy = true predictions / all predictions
                y.append([np.trace(confusion) / np.sum(confusion) for j in range(len(x))])

        colors = plt.get_cmap('Set2').colors

        ax1.plot(x, y[0], label=model, color=colors[k])
        ax2.plot(x, y[1], label=model, color=colors[k])
        ax3.plot(x, y[2], label=model, color=colors[k])

        if trial == 1:
            trialname = "Even"
        else:
            trialname = "U5 Separate"
    
    ax1.set_ylabel(metric.capitalize())
    ax2.set_xlabel("Location")
    ax2.set_title(f"{metric.capitalize()} on Training, Validation, and Test Sets ({trialname})")
    ax3.legend(loc='center right', bbox_to_anchor=(1.34, 0.5))
    plt.tight_layout()
    plt.savefig(f"{path}/figures/nn-results/{metric}_{trial}.png")
    # plt.show()

In [None]:
comparison("accuracy", 1)
comparison("accuracy", 2)
comparison("recall", 1)
comparison("recall", 2)
comparison("precision", 1)
comparison("precision", 2)

##### Recall vs. Class Counts

In [None]:
s1_counts = [729, 660, 747, 721, 675]
versions = ["ver2", "ver3", "ver4", "ver5a", "ver5b", "ver6", "ver7"]
sets = ["train", "val", "test"]

# count true positives across versions
tps = [[0, 0, 0, 0, 0] for i in range(len(sets))]
for i in range(len(versions)):
    model = versions[i]
    for j in range(len(sets)):
        guesses = pd.read_csv(f"{path}/logs/nn-results/{model}_accuracy_{sets[j]}_2.txt")
        confusion = sklearn.metrics.confusion_matrix(guesses["True"], guesses["Predicted"], normalize="true")
        for k in range(5):
            tps[j][k] += confusion[k][k]

# calculate average and recall (TP / (TP + FN))
for i in range(len(sets)):
    for j in range(5):
        tps[i][j] = (tps[i][j] / len(versions))

# plot
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(12, 4))
ax1.scatter(s1_counts, tps[0])
ax2.scatter(s1_counts, tps[1])
ax3.scatter(s1_counts, tps[2])

# labels
ax1.set_title("Training Set")
ax2.set_title("Validation Set")
ax3.set_title("Test Set")
ax1.set_ylabel("Recall")
ax2.set_xlabel("Items in Class")

##### Violin plot: accuracy vs. dataset

In [None]:
sets = ["set1", "set2", "set3", "set4", "set5", "set6"]
datasets = ["train", "test"]

In [None]:
def violin1(set):
    arr = np.zeros((10,2))
    for i in range(len(datasets)):
        for j in range(10):
            preds = pd.read_csv(f"{path}/logs/set-results/{set}_{datasets[i]}_{j+1}.txt")
            confusion = sklearn.metrics.confusion_matrix(preds["True"], preds["Predicted"])
            arr[j,i] = np.trace(confusion) / np.sum(confusion)
    plt.violinplot(arr, showmedians=True)
    plt.title(f"Accuracy over 10 Trials ({set.capitalize()})")
    plt.ylabel("Accuracy")
    plt.xlabel("Dataset (Train, Test = 1, 2)")
    plt.savefig(f"{path}/figures/set-results/{set}_violin.png")
    plt.show()

In [None]:
def violin2():
    arr1 = np.zeros((10, len(sets)))
    arr2 = np.zeros((10,len(sets)))
    for i in range(len(sets)):
        for j in range(10):
            # training set
            preds1 = pd.read_csv(f"{path}/logs/set-results/{sets[i]}_train_{j+1}.txt")
            confusion1 = sklearn.metrics.confusion_matrix(preds1["True"], preds1["Predicted"])
            arr1[j,i] = np.trace(confusion1) / np.sum(confusion1)

            # test set
            preds2 = pd.read_csv(f"{path}/logs/set-results/{sets[i]}_test_{j+1}.txt")
            confusion2 = sklearn.metrics.confusion_matrix(preds2["True"], preds2["Predicted"])
            arr2[j,i] = np.trace(confusion2) / np.sum(confusion2)

    fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(8, 4), sharey=True)
    
    plots1 = ax1.violinplot(arr1, showmedians=True)
    # plots1['bodies'][-1].set_facecolor("purple")
    ax1.set_title("Training Set", fontsize=10)
    ax1.set_xlabel("Dataset Version")
    ax1.set_xlabel("Dataset Version")
    ax1.set_ylabel("Accuracy")

    plots2 = ax2.violinplot(arr2, showmedians=True)
    # plots2['bodies'][-1].set_facecolor("purple")
    ax2.set_title("Test Set", fontsize=10)
    ax2.set_xlabel("Dataset Version")

    plt.suptitle("2 Channels (1) vs. 6 Channels (2)")

    # plt.title(f"Accuracy over 10 Trials")
    # plt.savefig(f"{path}/figures/set-results/{set}_violin.png")
    plt.show()

In [None]:
violin1("set9")

In [None]:
# sets = ["set1", "set4", "set5", "set7"]
sets = ["set1", "set9"]
# sets = ["set1", "set7", "set9"]
violin2()