In [None]:
import json
import matplotlib.pyplot as plt

In [None]:
with open("../data/loss_log.json", "r") as f:
    json_data = json.load(f)

In [None]:
models = ["proposed", "user2vec"]
train_loss_names = {"loss": "train-loss"}
test_loss_names = {
    "train-size=0": "test-loss (train-size=0)",
    "train-size=10": "test-loss (train-size=10)",
    "train-size=20": "test-loss (train-size=20)",
    "train-size=30": "test-loss (train-size=30)",
    "train-size=40": "test-loss (train-size=40)",
    "train-size=50": "test-loss (train-size=50)",
}

loss_data = {}

for model in models:
    loss_data[model] = {}
    for epoch_data in json_data[model]:
        for loss_name, display_loss_name in train_loss_names.items():
            display_loss_name = model + " " + display_loss_name
            if display_loss_name not in loss_data[model]:
                loss_data[model][display_loss_name] = []
            loss_data[model][display_loss_name].append(epoch_data[loss_name])
        for loss_name, display_loss_name in test_loss_names.items():
            display_loss_name = model + " " + display_loss_name
            if display_loss_name not in loss_data[model]:
                loss_data[model][display_loss_name] = []
            loss_data[model][display_loss_name].append(epoch_data["test_loss"][loss_name])

In [None]:
import matplotlib.cm as cm
fig, ax = plt.subplots(figsize=(6, 4))

for model in models:
    cmap = cm.Reds if model == "proposed" else cm.Blues
    # linestyle = "solid" if model == "proposed" else "dashed"
    for i, (loss_name, losses) in enumerate(loss_data[model].items()):
        ax.plot(losses[:20], label=loss_name, color=cmap(0.5 + 0.5 * i / len(loss_data[model])))

ax.set_ylabel("Loss")
ax.set_xlabel("Epoch")
fig.legend()

In [None]:
fig, ax = plt.subplots(figsize=(6, 4))

for model in ["proposed"]:
    for i, (loss_name, losses) in enumerate(loss_data[model].items()):
        if loss_name == "proposed train-loss":
            color = "#274A78"
        else:
            color = cm.Oranges(i / 5)
        ax.plot(losses[:20], label=loss_name, color=color)

ax.set_ylabel("Loss")
ax.set_xlabel("Epoch")
fig.legend()
fig.savefig("data/loss.svg", format="svg")

In [None]:
fig, ax = plt.subplots()

colors = {
    "proposed": "#EC6D51",
    "user2vec": "#274A78"
}

for model in models:
    i = -1
    xs, ys = [], []
    for loss_name, losses in loss_data[model].items():
        i += 1
        if i == 0:
            continue
        xs.append(i * 10)
        ys.append(min(losses))
    ax.plot(xs, ys, marker='o', label=model, color=colors[model])

ax.set_ylabel("Loss")
ax.set_xlabel("Train data size")
fig.legend()
fig.savefig("data/compare.svg", format="svg")
fig.savefig("data/compare.pdf", format="pdf")

In [None]:
proposed = [0.13780289706080517, 0.10975917064129039, 0.09165382198989391, 0.08783298873520912, 0.07421393228813689]
doc2vec = [0.19013651333590773, 0.15874055341044638, 0.1320475473048839, 0.12571821234961775, 0.11140885534993511]
proposed_gender = [0.1454391172750199, 0.11478135786316496,  0.09466286221558744,  0.089873733732136,  0.07711116232453509]

for i in range(5):
    print(proposed[i] - doc2vec[i])