Skip to content

Commit

Permalink
Feat/use median in calculating recall cost, forget cost and learn cost (
Browse files Browse the repository at this point in the history
  • Loading branch information
L-M-Sherlock committed Apr 25, 2024
1 parent e1ac427 commit b150209
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 31 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"

[project]
name = "FSRS-Optimizer"
version = "4.28.1"
version = "4.28.2"
readme = "README.md"
dependencies = [
"matplotlib>=3.7.0",
Expand Down
69 changes: 39 additions & 30 deletions src/fsrs_optimizer/fsrs_optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -597,16 +597,34 @@ def create_time_series(
)

self.recall_costs = np.zeros(3)
recall_card_revlog = recall_card_revlog[
(recall_card_revlog["review_duration"] > 0)
& (df["review_duration"] < 1200000)
]
recall_costs = recall_card_revlog.groupby(by="review_rating")[
"review_duration"
].mean()
].median()
self.recall_costs[recall_costs.index - 2] = recall_costs / 1000

self.state_sequence = np.array(df["review_state"])
self.duration_sequence = np.array(df["review_duration"])
self.state_sequence = np.array(
df[(df["review_duration"] > 0) & (df["review_duration"] < 1200000)][
"review_state"
]
)
self.duration_sequence = np.array(
df[(df["review_duration"] > 0) & (df["review_duration"] < 1200000)][
"review_duration"
]
)
self.learn_cost = round(
df[df["review_state"] == Learning]["review_duration"].sum()
/ len(df["card_id"].unique())
df[
(df["review_state"] == Learning)
& (df["review_duration"] > 0)
& (df["review_duration"] < 1200000)
]
.groupby("card_id")
.agg({"review_duration": "sum"})["review_duration"]
.median()
/ 1000,
1,
)
Expand Down Expand Up @@ -1185,34 +1203,25 @@ def find_optimal_retention(
verbose=True,
):
"""should not be called before predict_memory_states"""
recall_cost = 8
forget_cost = 25

state_block = dict()
state_count = dict()
state_duration = dict()

state_durations = dict()
last_state = self.state_sequence[0]
state_block[last_state] = 1
state_count[last_state] = 1
state_duration[last_state] = self.duration_sequence[0]
for i, state in enumerate(self.state_sequence[1:]):
state_count[state] = state_count.setdefault(state, 0) + 1
state_duration[state] = (
state_duration.setdefault(state, 0) + self.duration_sequence[i]
)
if state != last_state:
state_block[state] = state_block.setdefault(state, 0) + 1
state_durations[last_state] = [self.duration_sequence[0]]
for i, state in enumerate(self.state_sequence[1:], start=1):
if state not in state_durations:
state_durations[state] = []
if state == Review:
state_durations[state].append(self.duration_sequence[i])
else:
if state == last_state:
state_durations[state][-1] += self.duration_sequence[i]
else:
state_durations[state].append(self.duration_sequence[i])
last_state = state

recall_cost = round(state_duration[Review] / state_count[Review] / 1000, 1)

if Relearning in state_count and Relearning in state_block:
forget_cost = round(
state_duration[Relearning] / state_block[Relearning] / 1000
+ recall_cost,
1,
)
recall_cost = round(np.median(state_durations[Review]) / 1000, 1)
forget_cost = round(
np.median(state_durations[Relearning]) / 1000 + recall_cost, 1
)
if verbose:
tqdm.write(f"average time for failed reviews: {forget_cost}s")
tqdm.write(f"average time for recalled reviews: {recall_cost}s")
Expand Down

0 comments on commit b150209

Please sign in to comment.