In [None]:
import numpy as np
import pandas as pd
from means import Kmeans, DataGenerator
from sklearn.datasets import make_moons, make_circles, load_iris, load_wine
import matplotlib.pyplot as plt
import seaborn as sns

from tqdm import tqdm

In [None]:
file_path = "results_orig/clustering_performance_results_formatted.xlsx"
data = pd.ExcelFile(file_path)
df = pd.concat([
    data.parse("Results1"),
    data.parse("Results2"),
])
df

In [None]:
file_path = "results/clustering_performance_results_formatted.xlsx"
data = pd.ExcelFile(file_path)
df = pd.concat([
    data.parse("Results1"),
    data.parse("Results2"),
])

df.Dataset = df.Dataset.str.replace("Random","Uniform")

df = df[df["K"].isin([3, 20, 100])]
df = df[~df.Dataset.isin(['Circles', 'Moons'])]# 'Random_high', 'Random_low', 'Random_medium'])]

def calc_speedup(table):
    reference = table[table.Method == "Elkan"].Distance_Evaluations
    assert len(reference) == 1
    reference = reference.iloc[0]
    table["Speedup"] = table.Distance_Evaluations / reference
    return table[["Method", "Speedup"]]

all_speedups = df.groupby(["Dataset", "K"]).apply(calc_speedup, include_groups=False).reset_index()
all_speedups.drop(columns="level_2", inplace=True)
all_speedups

In [None]:
speedups = all_speedups[all_speedups.Method.isin(["Ptolemy_upper","Ptolemy_lower", "Ptolemy"])]
speedups

In [None]:
sns.set_palette("colorblind")

# width of available page is ~5 inches, we don't have a lot of height left, so only allocating 3 inches for height
scale = 2
fig = plt.figure(figsize=(5*scale,3*scale),dpi=300) 

layout = [
        ["Iris", "Wine", "legend"],
        ["Gaussian_low","Gaussian_medium", "Gaussian_high"],
        ["Uniform_low","Uniform_medium", "Uniform_high"],
    ]
fig = plt.figure(layout="constrained")
ax_mosaic = fig.subplot_mosaic(
    layout,
    sharex=True,
)

# sharey does not work as intended, so we have to set lables manually
for col in layout:
    first = col[0]
    for entry in col[1:]:
        if entry == "empty":
            continue
        ax_mosaic[entry].set_ylabel("")
        ax_mosaic[entry].tick_params(labelleft=False)
        ax_mosaic[entry].sharey(ax_mosaic[first])

def plot_single(data, name, legend=False):
    axis = ax_mosaic[name]
    
    plt_kwargs = dict(
        x="K",
        y="Speedup",
        hue="Method",
        legend=legend,
        style="Method",
        markers=True,
    )
    sns.lineplot(
            data=data,
            ax=axis,
            **plt_kwargs,
    )
    axis.set_title(name.replace("_",", "))

for name, dataset in speedups.groupby("Dataset"):
    if name == "Wine":
        plot_single(dataset, name, legend=True)
    else:
        plot_single(dataset, name)


# generate legend inside the "fake" plot
ax_legend = ax_mosaic["legend"]

# copy over legend
handles, labels = ax_mosaic['Wine'].get_legend_handles_labels()
ax_mosaic['Wine'].get_legend().remove()
ax_legend.legend(handles, labels, loc='center')
ax_legend.axis("off")


plt.savefig("results/combined_plot.pdf", format="pdf")
plt.show()

#sns.move_legend(legend, "upper left", bbox_to_anchor=(1, 1))


#for col in layout:
#    first = col[0]
#    ax_mosaic[first].set_ylim(bottom=0.6)