In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("./short_term_stability.tsv", sep="\t")
print(df.shape)

(667, 8)


In [2]:
# Define the r_history values to filter
r_history_values = ["1", "2", "3", "1,2", "1,3", "3,2"]

# Group by user_id and filter
filtered_user_ids = df.groupby("user").filter(
    lambda x: all(item in x["r_history"].values for item in r_history_values)
)

# Extract the user_ids
filtered_user_ids = filtered_user_ids["user"].unique()

# Filter the original DataFrame to keep only the filtered user_ids
df = df[df["user"].isin(filtered_user_ids)]

print(df.shape)

(400, 8)


In [3]:
print("Unit: seconds\n")

print("First Rating: Again")
print(
    df[(df["r_history"] == "1")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print("First Rating: Hard")
print(
    df[(df["r_history"] == "2")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print("First Rating: Good")
print(
    df[(df["r_history"] == "3")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)

Unit: seconds

First Rating: Again
        average_retention  average_delta_t   stability
mean             0.750572      2335.406250   86.625000
median           0.799250       131.500000   42.500000
std              0.147190      7972.163186  120.517098
First Rating: Hard
        average_retention  average_delta_t      stability
mean             0.933462     66574.312500   51441.562500
median           0.957350       758.500000    2229.500000
std              0.069808    209469.800196  196175.351802
First Rating: Good
        average_retention  average_delta_t      stability
mean             0.972578      45517.34375  110721.593750
median           0.981750        820.50000   13623.500000
std              0.025519     214340.03358  441054.992066


In [4]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)
print(
    df[(df["r_history"] == "2")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)
print(
    df[(df["r_history"] == "3")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

     average_retention  average_delta_t  stability
0.0            0.40540             76.0        4.0
0.1            0.50391             85.5        7.6
0.2            0.64326             96.8       20.0
0.3            0.72831            106.2       26.2
0.4            0.76990            120.0       37.0
0.5            0.79925            131.5       42.5
0.6            0.80504            140.8       53.6
0.7            0.83319            154.7       88.3
0.8            0.85154            276.0      129.0
0.9            0.88663           1220.1      186.2
1.0            0.95770          36810.0      618.0
     average_retention  average_delta_t  stability
0.0            0.73030            103.0      143.0
0.1            0.84656            391.5      466.1
0.2            0.90618            463.0      728.8
0.3            0.92457            516.0     1323.2
0.4            0.94006            647.6     1541.6
0.5            0.95735            758.5     2229.5
0.6            0

In [5]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1,3")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "1,3")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  average_delta_t      stability
mean             0.930116    119454.375000   51160.562500
median           0.948050       939.000000    3608.000000
std              0.066514    513638.659307  221435.156787
     average_retention  average_delta_t  stability
0.0            0.74080             79.0      149.0
0.1            0.84555            324.1      275.0
0.2            0.89488            570.6      611.6
0.3            0.91259            653.9     1277.8
0.4            0.93694            805.8     2587.6
0.5            0.94805            939.0     3608.0
0.6            0.95860           1434.8     5803.4
0.7            0.97602           3264.7    11185.5
0.8            0.98418          10125.2    20503.2
0.9            0.99379          60438.2    43436.6
1.0            1.00000        2854675.0  1259946.0


In [6]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1,2")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "1,2")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  average_delta_t     stability
mean             0.859450      37710.81250   9970.562500
median           0.899500        630.50000    608.500000
std              0.128735     143518.68286  27433.451059
     average_retention  average_delta_t  stability
0.0            0.48760            158.0       33.0
0.1            0.69510            336.2       96.3
0.2            0.80908            415.4      203.2
0.3            0.83717            491.2      300.7
0.4            0.87066            534.2      323.8
0.5            0.89950            630.5      608.5
0.6            0.91964            796.0      939.4
0.7            0.93624           1526.8     1895.9
0.8            0.96072          11759.4     4053.8
0.9            0.97531          58251.5    20018.4
1.0            1.00000         805777.0   135929.0


In [7]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "3,2")][
        ["average_retention", "average_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "3,2")][
        ["average_retention", "average_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  average_delta_t     stability
mean             0.955897     83091.031250  100890.81250
median           0.992950      2020.500000   20788.50000
std              0.070868    273855.659436  257835.18126
     average_retention  average_delta_t  stability
0.0            0.74550             30.0      191.0
0.1            0.88122            306.4     1906.3
0.2            0.90750            689.0     4826.4
0.3            0.96183            813.5     8470.1
0.4            0.98068           1778.8    16319.0
0.5            0.99295           2020.5    20788.5
0.6            0.99928           5687.0    26406.4
0.7            1.00000          19146.9    41832.6
0.8            1.00000          29254.2    62256.0
0.9            1.00000          82613.1   298800.9
1.0            1.00000        1380147.0  1348462.0
