In [1]:
import json
from pathlib import Path
import numpy as np


def print_transition_matrix(transition_matrix):
    """
    Print the transition matrix with row headers as previous rating and column headers as next rating.
    Row = previous rating, Column = next rating
    """
    print("(row = previous rating, column = next rating)")
    print("    | " + " ".join([f"   {j+1}   " for j in range(4)]))
    print("----+" + "--------" * 4)
    for i in range(len(transition_matrix)):
        print(
            f" {i+1}  | "
            + "  ".join(
                [
                    f"{transition_matrix[i, j]:.4f}"
                    for j in range(len(transition_matrix[i]))
                ]
            )
        )
    print("\n")


result_file = Path(f"button_usage.jsonl")
if result_file.exists():
    data = list(map(lambda x: json.loads(x), open(result_file).readlines()))

    print(f"The number of collections: {len(data)}\n")

    review_cnts = np.array([x["review_cnt"] for x in data])
    review_cnts_mean = np.mean(review_cnts).round(3)
    review_cnts_median = np.median(review_cnts).round(3)
    print(f"Review count mean: {review_cnts_mean}\n")
    print(f"Review count median: {review_cnts_median}\n")

    card_cnts = np.array([x["card_cnt"] for x in data])
    card_cnts_mean = np.mean(card_cnts).round(3)
    card_cnts_median = np.median(card_cnts).round(3)
    print(f"Card count mean: {card_cnts_mean}\n")
    print(f"Card count median: {card_cnts_median}\n")

    print(f"Reviews per card mean: {review_cnts_mean / card_cnts_mean:.3f}\n")

    learn_costs = np.array([x["learn_costs"] for x in data])
    review_costs = np.array([x["review_costs"] for x in data])

    learn_costs_median = np.median(learn_costs, axis=0).round(3).tolist()
    review_costs_median = np.median(review_costs, axis=0).round(3).tolist()
    print(f"Learn costs median: {learn_costs_median}\n")
    print(f"Review costs median: {review_costs_median}\n")

    first_rating_probs = np.array([x["first_rating_prob"] for x in data])
    review_rating_probs = np.array([x["review_rating_prob"] for x in data])

    first_rating_probs_mean = np.mean(first_rating_probs, axis=0).round(3).tolist()
    review_rating_probs_mean = np.nanmean(review_rating_probs, axis=0).round(3).tolist()
    print(f"First rating prob mean: {first_rating_probs_mean}\n")
    print(f"Review rating prob mean: {review_rating_probs_mean}\n")

    first_rating_offsets = np.array([x["first_rating_offset"] for x in data])
    first_session_lens = np.array([x["first_session_len"] for x in data])

    first_rating_offsets_median = (
        np.median(first_rating_offsets, axis=0).round(3).tolist()
    )
    first_session_lens_median = np.median(first_session_lens, axis=0).round(3).tolist()
    print(f"First rating offset median: {first_rating_offsets_median}\n")
    print(f"First session len median: {first_session_lens_median}\n")

    forget_rating_offsets = np.array([x["forget_rating_offset"] for x in data])
    forget_session_lens = np.array([x["forget_session_len"] for x in data])

    forget_rating_offsets_median = np.median(forget_rating_offsets).round(3)
    forget_session_lens_median = np.median(forget_session_lens).round(3)
    print(f"Forget rating offset median: {forget_rating_offsets_median}\n")
    print(f"Forget session len median: {forget_session_lens_median}\n")

    short_term_recall = np.array([x["short_term_recall"] for x in data])
    short_term_recall = short_term_recall[
        ~(short_term_recall == 0).any(axis=1) & ~(short_term_recall == 1).any(axis=1)
    ]
    short_term_recall_mean = np.mean(short_term_recall, axis=0).round(3).tolist()
    print(f"Short term recall mean: {short_term_recall_mean}\n")

    learning_step_transition = np.array([x["learning_step_transition"] for x in data])
    learning_step_transition_total = np.sum(learning_step_transition, axis=0).tolist()
    transition_matrix = np.zeros((3, 4))
    for i in range(len(learning_step_transition_total)):
        row_sum = np.sum(learning_step_transition_total[i])
        transition_matrix[i] = learning_step_transition_total[i] / row_sum
    print(f"Learning step transition:")
    print_transition_matrix(transition_matrix)

    relearning_step_transition = np.array(
        [x["relearning_step_transition"] for x in data]
    )
    relearning_step_transition_total = np.sum(
        relearning_step_transition, axis=0
    ).tolist()
    transition_matrix = np.zeros((3, 4))
    for i in range(len(relearning_step_transition_total)):
        row_sum = np.sum(relearning_step_transition_total[i])
        transition_matrix[i] = relearning_step_transition_total[i] / row_sum
    print(f"Relearning step transition:")
    print_transition_matrix(transition_matrix)

    long_term_transition = np.array([x["long_term_transition"] for x in data])
    long_term_transition_total = np.sum(long_term_transition, axis=0).tolist()
    transition_matrix = np.zeros((4, 4))
    for i in range(len(long_term_transition_total)):
        row_sum = np.sum(long_term_transition_total[i])
        transition_matrix[i] = long_term_transition_total[i] / row_sum
    print(f"Long term transition:")
    print_transition_matrix(transition_matrix)

    review_rating_chance = np.array(
        [
            [
                1 - x["true_retention"],
                *[y * x["true_retention"] for y in x["review_rating_prob"]],
            ]
            for x in data
        ]
    )
    long_term_transition_chance = np.array(
        [
            [
                [round(y / sum(row), 3) if y != 0 else 0 for y in row]
                for row in x["long_term_transition"]
            ]
            for x in data
        ]
    )
    repeated_review_rating_chance = np.repeat(
        review_rating_chance[:, :, None], 4, axis=2
    ).transpose(0, 2, 1)
    long_term_transition_differences = np.nan_to_num(
        long_term_transition_chance - repeated_review_rating_chance, 0
    )
    long_term_transition_adjusted = np.mean(
        long_term_transition_differences, axis=0
    ).tolist()
    transition_matrix = np.zeros((4, 4))
    for i in range(len(long_term_transition_adjusted)):
        transition_matrix[i] = long_term_transition_adjusted[i]
    print(f"Long term transition deviation:")
    print(
        f"Displays the average difference between users general chances of using a given rating,\n"
        "and the chances of a rating dependant on the previous rating."
    )
    print_transition_matrix(transition_matrix)

    state_rating_costs = np.array([x["state_rating_costs"] for x in data])
    state_rating_costs_median = np.median(state_rating_costs, axis=0).round(2).tolist()
    print(f"State rating costs median: {state_rating_costs_median}\n")

    print(
        f"learn cost median: {np.median(np.sum(learn_costs * first_rating_probs, axis=1)).round(4)}\n"
    )
    print(
        f"pass cost median: {np.nanmedian(np.sum(review_costs[:, 1:] * review_rating_probs, axis=1)).round(4)}\n"
    )
    print(f"fail cost median: {np.median(review_costs[:, 0]).round(4)}\n")
    print(f"initial pass prob: {np.mean(1 - first_rating_probs[:, 0]).round(4)}\n")

The number of collections: 10000

Review count mean: 72703.798

Review count median: 30899.5

Card count mean: 9561.975

Card count median: 4821.0

Reviews per card mean: 7.603

Learn costs median: [48.32, 38.48, 23.165, 11.19]

Review costs median: [33.16, 17.725, 12.485, 9.02]

First rating prob mean: [0.236, 0.103, 0.489, 0.173]

Review rating prob mean: [0.231, 0.626, 0.143]

First rating offset median: [-0.75, -0.19, -0.01, 0.0]

First session len median: [2.04, 1.43, 0.82, 0.0]

Forget rating offset median: -0.27

Forget session len median: 1.06

Short term recall mean: [0.742, 0.917, 0.962, 0.853]

Learning step transition:
(row = previous rating, column = next rating)
    |    1       2       3       4   
----+--------------------------------
 1  | 0.3686  0.0628  0.5108  0.0577
 2  | 0.0442  0.4553  0.4457  0.0549
 3  | 0.0519  0.0470  0.8462  0.0550


Relearning step transition:
(row = previous rating, column = next rating)
    |    1       2       3       4   
----+---------