In [1]:
import numpy as np
import json
from pathlib import Path

result_file = Path(f"../../Anki-button-usage/button_usage.jsonl")

if result_file.exists():
    data = list(map(lambda x: json.loads(x), open(result_file).readlines()))

review_rating_probs = np.array([x["review_rating_prob"] for x in data])
user_ids = np.array([x["user"] for x in data])
variances = np.var(review_rating_probs, axis=1)
avg_variance = np.nanmean(variances)
four_button_user_ids = user_ids[variances < avg_variance / 2]

In [2]:
def weighted_avg_and_std(values, weights):
    """
    Return the weighted average and standard deviation.

    They weights are in effect first normalized so that they
    sum to 1 (and so they must not all be 0).

    values, weights -- NumPy ndarrays with the same shape.
    """
    weights = np.float64(weights)  # force 64-bit precision to avoid errors sometimes
    average = np.average(values, weights=weights)
    # Bevington, P. R., Data Reduction and Error Analysis for the Physical Sciences, 336 pp., McGraw-Hill, 1969
    # https://seismo.berkeley.edu/~kirchner/Toolkits/Toolkit_12.pdf
    n_eff = np.square(np.sum(weights)) / np.sum(np.square(weights))
    variance = np.average((values - average) ** 2, weights=weights) * (
        n_eff / (n_eff - 1)
    )
    return (average, np.sqrt(variance))

In [None]:
for model in ["FSRS-6", "FSRS-6-binary"]:
    print(f"Model: {model}")
    m = []
    parameters = []
    sizes = []
    result_file = Path(f"../result/{model}.jsonl")
    with open(result_file, "r") as f:
        data = [json.loads(x) for x in f.readlines()]
    for result in data:
        if result["user"] not in four_button_user_ids:
            continue
        m.append(result["metrics"])
        sizes.append(result["size"])
        if "parameters" in result:
            if isinstance(result["parameters"], list):
                parameters.append(result["parameters"])
            else:
                parameters.extend(result["parameters"].values())

    print(f"Total number of users: {len(sizes)}")
    print(f"Total number of reviews: {sum(sizes)}")
    for scale, size in (
        ("reviews", np.array(sizes)),
        ("log(reviews)", np.log(sizes)),
        ("users", np.ones_like(sizes)),
    ):
        print(f"Weighted average by {scale}:")
        for metric in ("LogLoss", "RMSE(bins)", "AUC"):
            metrics = np.array([item[metric] for item in m])
            size = size[~np.isnan(metrics.astype(float))]
            metrics = metrics[~np.isnan(metrics.astype(float))]
            wmean, wstd = weighted_avg_and_std(metrics, size)
            print(f"{model} {metric} (mean±std): {wmean:.4f}±{wstd:.4f}")

    print()

Model: FSRS-6
Total number of users: 2570
Total number of reviews: 72278130
Weighted average by reviews:
FSRS-6 LogLoss (mean±std): 0.3278±0.1503
FSRS-6 RMSE(bins) (mean±std): 0.0511±0.0305
FSRS-6 AUC (mean±std): 0.7043±0.0773
Weighted average by log(reviews):
FSRS-6 LogLoss (mean±std): 0.3608±0.1611
FSRS-6 RMSE(bins) (mean±std): 0.0687±0.0410
FSRS-6 AUC (mean±std): 0.6994±0.0848
Weighted average by users:
FSRS-6 LogLoss (mean±std): 0.3637±0.1623
FSRS-6 RMSE(bins) (mean±std): 0.0712±0.0421
FSRS-6 AUC (mean±std): 0.6985±0.0862

Model: FSRS-6-binary
Total number of users: 2570
Total number of reviews: 72278130
Weighted average by reviews:
FSRS-6-binary LogLoss (mean±std): 0.3347±0.1517
FSRS-6-binary RMSE(bins) (mean±std): 0.0553±0.0312
FSRS-6-binary AUC (mean±std): 0.6736±0.0903
Weighted average by log(reviews):
FSRS-6-binary LogLoss (mean±std): 0.3688±0.1627
FSRS-6-binary RMSE(bins) (mean±std): 0.0728±0.0416
FSRS-6-binary AUC (mean±std): 0.6698±0.0953
Weighted average by users:
FSRS-6-b