### Data visualisation

Define a visual representation for the experimental measurements of the kernel perceptron implementation, then convert them to Tikz using the `tikzplotlib` library. The only graphs neede are heatmaps and line graphs. Save the resulting .tex (and .png for the heatmaps) in `OUTPUT`.

In [1]:
import sys
sys.path.append("../")

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tikzplotlib

import os
import shutil

from KMeans import RESULTS_LOCATION, REDUCTIONS
from utils import EPOCHS, DEGREES

OUTPUT = "./output"

In [3]:
# tikzplotlib in some plots has issue rendering the labels,
# they need to be written in LateX notation

def fix(labels):
    return [f"${label}$" for label in labels]

In [4]:
def heatmap(data, attribute, reduction, max_error, min_error):
    inferno = plt.cm.get_cmap('inferno').reversed()
    error = data[attribute].to_numpy().reshape((len(EPOCHS), len(DEGREES)))[::-1]

    plt.xticks(ticks=np.arange(len(DEGREES)), labels=DEGREES)
    plt.yticks(ticks=np.arange(len(EPOCHS)), labels=EPOCHS[::-1])

    hm = plt.imshow(error, cmap=inferno, vmax=max_error, vmin=min_error)
    plt.colorbar(hm, cmap=inferno)

    plt.xlabel("Grado del polinomio")
    plt.ylabel("Epoche")

    tikzplotlib.save(f"{OUTPUT}/{reduction}_{attribute}.tex")
    plt.close()

In [5]:
def lines(data, attribute, reduction, max_error, min_error):
    for d in DEGREES:
        error = data[data["degree"] == d].sort_values("epochs")[attribute]
        plt.plot(EPOCHS, error, "v-", label=f"grado {d}")
    
    plt.xticks(ticks=np.arange(1, len(EPOCHS) + 1), labels=fix(EPOCHS))
    plt.xlabel("Epoche")
    
    plt.ylabel(attribute.replace("_", " ").capitalize())
    plt.legend(loc="upper right")

    tikzplotlib.save(f"{OUTPUT}/epochs_{reduction}_{attribute}.tex")
    plt.close()
    

    for e in EPOCHS:
        error = data[data["epochs"] == e].sort_values("degree")[attribute]
        plt.plot(DEGREES, error, "v-", label=f"{e} epoche")

    plt.xticks(ticks=np.arange(1, len(DEGREES) + 1), labels=fix(DEGREES))
    plt.xlabel("Grado del polinomio")
    
    plt.ylabel(attribute.replace("_", " ").capitalize())
    plt.legend(loc="upper right")

    tikzplotlib.save(f"{OUTPUT}/degree_{reduction}_{attribute}.tex")
    plt.close()

In [14]:
for red in REDUCTIONS:
    tt = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")["training_time"]
    print(f"""Reduction: {red}
    min: {tt.min():.4f},
    max: {tt.max():.4f},
    mean: {tt.mean():.4f},
    std: {tt.std():.4f}
""")

Reduction: 200
    min: 0.0816,
    max: 0.5751,
    mean: 0.2728,
    std: 0.0993

Reduction: 1000
    min: 0.4595,
    max: 3.4207,
    mean: 1.6032,
    std: 0.6345

Reduction: 1500
    min: 0.5966,
    max: 5.6043,
    mean: 2.5554,
    std: 1.1576



In [95]:
if os.path.exists(OUTPUT):
    shutil.rmtree(OUTPUT)

os.mkdir(OUTPUT)

plt.rcParams['figure.figsize'] = [10, 8]

max_err = pd.read_csv(f".{RESULTS_LOCATION}/{min(REDUCTIONS)}-kmmkp.csv")
min_err = pd.read_csv(f".{RESULTS_LOCATION}/{max(REDUCTIONS)}-kmmkp.csv")

max_test_error = max_err["test_error"].max()
min_test_error = min_err["test_error"].min()

max_training_error = max_err["training_error"].max()
min_training_error = min_err["training_error"].min()

for red in REDUCTIONS:
    results = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")

    heatmap(results, "test_error", red, max_test_error, min_test_error)
    heatmap(results, "training_error", red, max_training_error, min_training_error)

    lines(results, "test_error", red, max_test_error, min_test_error)
    lines(results, "training_error", red, max_training_error, min_training_error)