### Data visualisation

Define a visual representation for the experimental measurements of the kernel perceptron implementation, then convert them to Tikz using the `tikzplotlib` library. The only graphs neede are heatmaps and line graphs. Save the resulting .tex (and .png for the heatmaps) in `OUTPUT`.

In [10]:
import sys
sys.path.append("../")

In [11]:
import matplotlib.pyplot as plt
from matplotlib.ticker import StrMethodFormatter

import numpy as np
import pandas as pd
import tikzplotlib

import os
import shutil

from KMeans import RESULTS_LOCATION, REDUCTIONS
from utils import EPOCHS, DEGREES

OUTPUT = "./output"

In [12]:
# tikzplotlib in some plots has issue rendering the labels,
# they need to be written in LateX notation

def fix(labels):
    return [f"${label}$" for label in labels]

In [13]:
def heatmap(data, attribute, reduction, max_error, min_error, save=False):
    inferno = plt.cm.get_cmap('inferno').reversed()
    error = data[attribute].to_numpy().reshape((len(EPOCHS), len(DEGREES)))[::-1]

    plt.xticks(ticks=np.arange(len(DEGREES)), labels=fix(DEGREES))
    plt.yticks(ticks=np.arange(len(EPOCHS)), labels=fix(EPOCHS[::-1]))

    hm = plt.imshow(error, cmap=inferno, vmax=max_error, vmin=min_error)
    plt.colorbar(hm, cmap=inferno, format="%0.2f")

    plt.xlabel("Grado del polinomio")
    plt.ylabel("Epoche")

    if save:
        tikzplotlib.save(f"{OUTPUT}/{reduction}_{attribute}.tex")
        plt.close()
    else:
        plt.show()

In [14]:
def lines(data, attribute, reduction, max_error, min_error, save=False):
    plt.gca().yaxis.set_major_formatter("%0.2f")

    for d in DEGREES:
        error = data[data["degree"] == d].sort_values("epochs")[attribute]
        plt.plot(EPOCHS, error, "v-", label=f"grado {d}")
    
    plt.xticks(ticks=np.arange(1, len(EPOCHS) + 1), labels=fix(EPOCHS))
    plt.xlabel("Epoche")
    
    plt.ylabel(attribute.replace("_", " ").capitalize())
    plt.legend(loc="upper right")

    if save:
        tikzplotlib.save(f"{OUTPUT}/epochs_{reduction}_{attribute}.tex")
        plt.close()
    else:
        plt.show()


    plt.gca().yaxis.set_major_formatter("%0.2f")

    for e in EPOCHS:
        error = data[data["epochs"] == e].sort_values("degree")[attribute]
        plt.plot(DEGREES, error, "v-", label=f"{e} epoche")

    plt.xticks(ticks=np.arange(1, len(DEGREES) + 1), labels=fix(DEGREES))
    plt.xlabel("Grado del polinomio")
    
    plt.ylabel(attribute.replace("_", " ").capitalize())
    plt.legend(loc="upper right")

    if save:
        tikzplotlib.save(f"{OUTPUT}/degree_{reduction}_{attribute}.tex")
        plt.close()
    else:
        plt.show()

Training error

In [15]:
for red in REDUCTIONS:
    tt = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")["training_error"]
    print(f"""Reduction: {red}
    min: {tt.min():.4f},
    max: {tt.max():.4f},
    mean: {tt.mean():.4f},
    std: {tt.std():.4f}
""")

Reduction: 200
    min: 0.0000,
    max: 0.2990,
    mean: 0.0590,
    std: 0.0793

Reduction: 1000
    min: 0.0000,
    max: 0.2440,
    mean: 0.0323,
    std: 0.0536

Reduction: 1500
    min: 0.0000,
    max: 0.2147,
    mean: 0.0318,
    std: 0.0524



Test error

In [16]:
for red in REDUCTIONS:
    tt = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")["test_error"]
    print(f"""Reduction: {red}
    min: {tt.min():.4f},
    max: {tt.max():.4f},
    mean: {tt.mean():.4f},
    std: {tt.std():.4f}
""")

Reduction: 200
    min: 0.1449,
    max: 0.3033,
    mean: 0.1806,
    std: 0.0411

Reduction: 1000
    min: 0.0870,
    max: 0.2218,
    mean: 0.1182,
    std: 0.0416

Reduction: 1500
    min: 0.0759,
    max: 0.2040,
    mean: 0.1063,
    std: 0.0404



Training time

In [17]:
for red in REDUCTIONS:
    tt = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")["training_time"]
    print(f"""Reduction: {red}
    min: {tt.min():.4f},
    max: {tt.max():.4f},
    mean: {tt.mean():.4f},
    std: {tt.std():.4f}
""")

Reduction: 200
    min: 0.0714,
    max: 0.6982,
    mean: 0.3777,
    std: 0.1964

Reduction: 1000
    min: 0.3693,
    max: 3.5176,
    mean: 1.9894,
    std: 1.0133

Reduction: 1500
    min: 0.6230,
    max: 6.5353,
    mean: 3.4515,
    std: 1.8373



In [18]:
if os.path.exists(OUTPUT):
    shutil.rmtree(OUTPUT)

os.mkdir(OUTPUT)

plt.rcParams['figure.figsize'] = [10, 8]

max_err = pd.read_csv(f".{RESULTS_LOCATION}/{min(REDUCTIONS)}-kmmkp.csv")
min_err = pd.read_csv(f".{RESULTS_LOCATION}/{max(REDUCTIONS)}-kmmkp.csv")

max_test_error = max_err["test_error"].max()
min_test_error = min_err["test_error"].min()

max_training_error = max_err["training_error"].max()
min_training_error = min_err["training_error"].min()

for red in REDUCTIONS:
    results = pd.read_csv(f".{RESULTS_LOCATION}/{red}-kmmkp.csv")

    heatmap(results, "test_error", red, max_test_error, min_test_error, save=True)
    heatmap(results, "training_error", red, max_training_error, min_training_error, save=True)

    lines(results, "test_error", red, max_test_error, min_test_error, save=True)
    lines(results, "training_error", red, max_training_error, min_training_error, save=True)