In [1]:
import pandas as pd
import numpy as np

df = pd.read_csv("./short_term_stability.tsv", sep="\t")
print(df.shape)

(679, 9)


In [2]:
# Define the r_history values to filter
r_history_values = ["1", "2", "3", "1,2", "1,3", "3,2"]

# Group by user_id and filter
filtered_user_ids = df.groupby("user").filter(
    lambda x: all(item in x["r_history"].values for item in r_history_values)
)

# Extract the user_ids
filtered_user_ids = filtered_user_ids["user"].unique()

# Filter the original DataFrame to keep only the filtered user_ids
df = df[df["user"].isin(filtered_user_ids)]

print(df.shape)

(426, 9)


In [3]:
print("Unit: seconds\n")

print("First Rating: Again")
print(
    df[(df["r_history"] == "1")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print("First Rating: Hard")
print(
    df[(df["r_history"] == "2")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print("First Rating: Good")
print(
    df[(df["r_history"] == "3")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)

Unit: seconds

First Rating: Again
        average_retention  median_delta_t   stability
mean             0.743035      246.676471  202.647059
median           0.777150      114.000000   67.500000
std              0.146314      310.513623  340.372691
First Rating: Hard
        average_retention  median_delta_t      stability
mean             0.923953    28071.441176  148462.911765
median           0.953750      481.000000   15024.000000
std              0.075201   135149.469688  360681.209679
First Rating: Good
        average_retention  median_delta_t     stability
mean             0.962826    34414.117647  5.934497e+05
median           0.971000      760.500000  1.055380e+05
std              0.032451   158613.825968  1.256909e+06


In [4]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)
print(
    df[(df["r_history"] == "2")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)
print(
    df[(df["r_history"] == "3")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

     average_retention  median_delta_t  stability
0.0            0.39480            78.0        5.0
0.1            0.49495            83.3       12.5
0.2            0.68348            88.2       27.8
0.3            0.72364           101.7       39.9
0.4            0.75004           107.0       56.6
0.5            0.77715           114.0       67.5
0.6            0.80396           124.0       95.4
0.7            0.81786           135.6      147.9
0.8            0.84298           266.8      274.0
0.9            0.89179           772.6      609.4
1.0            0.95160          1189.0     1774.0
     average_retention  median_delta_t  stability
0.0            0.71720           103.0      245.0
0.1            0.85640           303.6     1282.7
0.2            0.86676           408.2     2253.6
0.3            0.92130           451.7     2734.6
0.4            0.93700           467.4     8298.0
0.5            0.95375           481.0    15024.0
0.6            0.96104           52

In [5]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1,3")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "1,3")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  median_delta_t      stability
mean             0.911762    64036.911765   97893.676471
median           0.917050      814.500000   12656.000000
std              0.069775   239638.951745  357581.096588
     average_retention  median_delta_t  stability
0.0            0.73390            75.0      249.0
0.1            0.81178           254.4      634.7
0.2            0.87462           593.6     2476.4
0.3            0.89058           695.4     5206.4
0.4            0.90292           747.6     6605.6
0.5            0.91705           814.5    12656.0
0.6            0.94128           909.6    21780.8
0.7            0.94994          1115.2    40773.0
0.8            0.97622          2494.6    53975.4
0.9            0.98898         85174.0   119328.7
1.0            1.00000       1309002.0  2089169.0


In [6]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "1,2")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "1,2")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  median_delta_t     stability
mean             0.831676    34050.882353  20756.588235
median           0.856100      535.000000   1430.000000
std              0.125670   124158.015206  60801.567192
     average_retention  median_delta_t  stability
0.0            0.51380           120.0       51.0
0.1            0.64695           353.2      233.5
0.2            0.76114           391.2      333.6
0.3            0.79114           433.9      522.0
0.4            0.82952           471.0      815.0
0.5            0.85610           535.0     1430.0
0.6            0.89684           606.4     2289.8
0.7            0.91321           704.9     4246.1
0.8            0.93022           925.6    14225.0
0.9            0.96508         93312.0    59876.6
1.0            1.00000        712927.0   342208.0


In [7]:
print("Unit: seconds\n")

print(
    df[(df["r_history"] == "3,2")][
        ["average_retention", "median_delta_t", "stability"]
    ].agg(["mean", "median", "std"])
)
print(
    df[(df["r_history"] == "3,2")][
        ["average_retention", "median_delta_t", "stability"]
    ].quantile(np.linspace(0, 1, 11))
)

Unit: seconds

        average_retention  median_delta_t      stability
mean             0.941659    86317.264706  309934.441176
median           0.978850      968.000000   69605.500000
std              0.081959   302474.196264  774513.630766
     average_retention  median_delta_t  stability
0.0            0.71790            39.0      162.0
0.1            0.80831           330.8     5667.6
0.2            0.88720           548.0    10412.8
0.3            0.95812           750.8    20345.3
0.4            0.96358           817.8    40420.2
0.5            0.97885           968.0    69605.5
0.6            0.98818          1216.4    96166.4
0.7            0.99516          1521.1   135397.4
0.8            1.00000         28610.8   251215.6
0.9            1.00000         87755.6   707867.4
1.0            1.00000       1609473.0  4283660.0
