[![](https://colab.research.google.com/assets/colab-badge.svg)
](https://colab.research.google.com/github/open-spaced-repetition/fsrs4anki/blob/main/fsrs4anki_optimizer.ipynb)

# FSRS4Anki v1.0.3 Optimizer

Upload your **Anki Deck Package (.apkg)** file or **Anki Collection Package (.colpkg)** file. No need to include media. Need to include scheduling information. 

> If you use the latest version of Anki, please check the box `Support older Anki versions (slower/larger files)` when you export.

You can export it via `File -> Export...` or `Ctrl + E` in the main window of Anki.

Then replace the `filename` with yours in the next code cell. And set the `timezone` and `next_day_starts_at` which can be found in your preferences of Anki.

After that, just run all (`Runtime -> Run all` or `Ctrl + F9`) and wait for minutes. You can see the optimal parameters in section **3 Result**. Copy them, replace the parameters in `fsrs4anki_scheduler.js`, and paste them into the custom scheduling of your deck options (require Anki version >= 2.1.55).

**NOTE**: The default output is generated from my review logs. If you find the output is the same as mine, maybe your notebook hasn't run there.

In [None]:
# Here are some settings that you need to replace before running this optimizer.

filename = "ALL__Learning.apkg" 
# If you upload deck file, replace it with your deck filename. E.g., ALL__Learning.apkg 
# If you upload collection file, replace it with your colpgk filename. E.g., collection-2022-09-18@13-21-58.colpkg

timezone = 'Asia/Shanghai'  # Replace it with your timezone. I'm in China, so I use Asia/Shanghai.

next_day_starts_at = 4  # Replace it with your Anki's setting in Prefernces -> Scheduling.

revlog_start_date = "2006-10-05"  # Replace it if you don't want the optimizer to use the review logs before a specific date.

## 1 Build dataset

### 1.1 Extract Anki collection & deck file

In [None]:
import zipfile
# Extract the collection file or deck file to get the .anki21 database.
with zipfile.ZipFile(f'./{filename}', 'r') as zip_ref:
    zip_ref.extractall('./')
    print("Extract successfully!")

Extract successfully!


In [None]:
import sqlite3
import time
import tqdm
import pandas as pd
import os
from datetime import timedelta, datetime
from tqdm import tqdm

### 1.2 Create time-series feature

The following code cell will extract the review logs from your Anki collection and preprocess them to a trainset which is saved in `revlog_history.tsv`.

 The time-series features are important in optimizing the model's parameters. For more detail, please see my paper: https://www.maimemo.com/paper/

In [None]:
if os.path.isfile("collection.anki21b"):
    os.remove("collection.anki21b")
    raise Exception("Please export the file with `support older Anki versions` if you use the latest version of Anki.")
elif os.path.isfile("collection.anki21"):
    con = sqlite3.connect("collection.anki21")
elif os.path.isfile("collection.anki2"):
    con = sqlite3.connect("collection.anki2")
else:
    raise Exception("Collection not exist!")
cur = con.cursor()
res = cur.execute("SELECT * FROM revlog")
revlog = res.fetchall()

df = pd.DataFrame(revlog)
df.columns = ['id', 'cid', 'usn', 'r', 'ivl', 'last_lvl', 'factor', 'time', 'type']
df = df[(df['cid'] <= time.time() * 1000) & 
        (df['id'] <= time.time() * 1000) & 
        (df['id'] >= time.mktime(datetime.strptime(revlog_start_date, "%Y-%m-%d").timetuple()) * 1000)].copy()
df['create_date'] = pd.to_datetime(df['cid'] // 1000, unit='s')
df['create_date'] = df['create_date'].dt.tz_localize('UTC').dt.tz_convert(timezone)
df['review_date'] = pd.to_datetime(df['id'] // 1000, unit='s')
df['review_date'] = df['review_date'].dt.tz_localize('UTC').dt.tz_convert(timezone)
df.sort_values(by=['cid', 'id'], inplace=True, ignore_index=True)
df.to_csv("revlog.csv", index=False)
print("revlog.csv saved!")
df = df[(df['type'] == 0) | (df['type'] == 1)].copy()
df['real_date'] = df['review_date'].map(lambda x: x - timedelta(days=1) if x.hour < next_day_starts_at else x)
df['real_date'] = df['real_date'].dt.floor('D')
df.drop(df[df['real_date'].dt.year < 2006].index, inplace=True)
df.drop_duplicates(['cid', 'real_date'], keep='first', inplace=True)
df['delta_t'] = df.real_date.diff().dt.days
df.dropna(inplace=True)
df['delta_t'] = df['delta_t'].astype(dtype=int)
df['i'] = 1
df['r_history'] = ""
df['t_history'] = ""
col_idx = {key: i for i, key in enumerate(df.columns)}


# code from https://github.com/L-M-Sherlock/anki_revlog_analysis/blob/main/revlog_analysis.py
def get_feature(x):
    for idx, log in enumerate(x.itertuples()):
        if idx == 0:
            x.iloc[idx, col_idx['delta_t']] = 0
        if idx == x.shape[0] - 1:
            break
        x.iloc[idx + 1, col_idx['i']] = x.iloc[idx, col_idx['i']] + 1
        x.iloc[idx + 1, col_idx['t_history']] = f"{x.iloc[idx, col_idx['t_history']]},{x.iloc[idx, col_idx['delta_t']]}"
        x.iloc[idx + 1, col_idx['r_history']] = f"{x.iloc[idx, col_idx['r_history']]},{x.iloc[idx, col_idx['r']]}"
    return x


tqdm.pandas()
df = df.groupby('cid', as_index=False).progress_apply(get_feature)
df["t_history"] = df["t_history"].map(lambda x: x[1:] if len(x) > 1 else x)
df["r_history"] = df["r_history"].map(lambda x: x[1:] if len(x) > 1 else x)
df.to_csv('revlog_history.tsv', sep="\t", index=False)
print("Trainset saved!")

revlog.csv saved!


100%|██████████| 5166/5166 [00:57<00:00, 90.28it/s] 


Trainset saved!


In [None]:
import math
import sys
import torch
import datetime
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from sklearn.utils import shuffle

The default parameters of FSRS.

In [None]:
defaultDifficulty = 5
defaultStability = 2
increaseFactor = 3
difficultyDecay = -0.7
stabilityDecay = -0.2
retrievabilityFactor = 1
lapsesBase = -0.3

## 2 Optimize parameter

### 2.1 Define the model

FSRS is a time-series model for predicting memory states.

In [None]:
class FSRS(nn.Module):
    def __init__(self):
        super(FSRS, self).__init__()
        self.f_s = nn.Parameter(torch.FloatTensor([defaultStability]))
        # init stability
        self.f_d = nn.Parameter(torch.FloatTensor([defaultDifficulty]))
        # init difficulty
        self.s_w = nn.Parameter(torch.FloatTensor(
            [increaseFactor, difficultyDecay, stabilityDecay, retrievabilityFactor, lapsesBase]))
        self.zero = torch.FloatTensor([0.0])

    def forward(self, x, s, d, l):
        '''
        :param x: [review interval, review response]
        :param s: stability
        :param d: difficulty
        :param l: lapses
        :return:
        '''
        if torch.equal(s, torch.FloatTensor([0.0])):
            # first learn, init memory states
            next_s = self.f_s[0] * 0.25 * torch.pow(2, x[1] - 1)
            next_d = self.f_d[0] - x[1] + 3
            next_l = torch.relu(2-x[1])
        else:
            r = torch.exp(np.log(0.9) * x[0] / s)
            next_s = (1 - torch.relu(2-x[1])) * s * \
                             (1 + torch.exp(self.s_w[0]) * torch.pow(d + 0.1, self.s_w[1]) *
                              torch.pow(s, self.s_w[2]) *
                              (torch.exp((1 - r) * self.s_w[3]) - 1)) + \
                             torch.relu(2-x[1]) * self.f_s[0] * torch.exp(self.s_w[4] * l)
            next_d = torch.relu(d + r - 0.25 * torch.pow(2, x[1] - 1) + 0.1)
            next_l = l + torch.relu(2-x[1])
        return next_s, next_d, next_l

    def loss(self, s, t, r):
        return - (r * np.log(0.9) * t / s + (1 - r) * torch.log(1 - torch.exp(np.log(0.9) * t / s)))


class WeightClipper(object):
    def __init__(self, frequency=1):
        self.frequency = frequency

    def __call__(self, module):
        if hasattr(module, 'f_s'):
            w = module.f_s.data
            w = w.clamp(0.1, 10)
            module.f_s.data = w
        if hasattr(module, 'f_d'):
            w = module.f_d.data
            w = w.clamp(1, 10)
            module.f_d.data = w
        if hasattr(module, 's_w'):
            w = module.s_w.data
            w[0] = w[0].clamp(0.01, 10)  # increaseFactor
            w[1] = w[1].clamp(-1, -0.01)  # difficultyDecay
            w[2] = w[2].clamp(-1, -0.01)  # stabilityDecay
            w[3] = w[3].clamp(0.01, 10)  # retrievabilityFactor
            w[4] = w[4].clamp(-1, -0.01)  # lapsesBase
            module.s_w.data = w


def lineToTensor(line):
    ivl = line[0].split(',')
    response = line[1].split(',')
    tensor = torch.zeros(len(response), 2)
    for li, response in enumerate(response):
        tensor[li][0] = int(ivl[li])
        tensor[li][1] = int(response)
    return tensor

### 2.2 Train the model

The `revlog_history.tsv` generated before will be used for training the FSRS model.

Training approximately spends (the number of logs / 10000) minutes to optimize the parameters.

In [None]:
model = FSRS()
clipper = WeightClipper()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

dataset = pd.read_csv("./revlog_history.tsv", sep='\t', index_col=None)
dataset = dataset[(dataset['i'] > 1) & (dataset['delta_t'] > 0)]
dataset['tensor'] = dataset.progress_apply(lambda x: lineToTensor(list(zip([x['t_history']], [x['r_history']]))[0]),axis=1)
print("Tensorized!")

n_epoch = 1
print_len = dataset.shape[0] // 10

checkpoint = {
    "net": model.state_dict(),
    'optimizer': optimizer.state_dict(),
    "epoch": -1
}

for k in range(n_epoch):
    dataset = shuffle(dataset, random_state=2022 + k)
    epoch_len = len(dataset)
    for i, (_, row) in enumerate(tqdm(dataset.iterrows(), total=epoch_len, desc="train",file=sys.stdout, colour="red")):
        model.train()
        optimizer.zero_grad()
        output_t = [(model.zero, model.zero, model.zero)]
        for input_t in row['tensor']:
            output_t.append(model(input_t, *output_t[-1]))
        loss = model.loss(output_t[-1][0], row['delta_t'], {1: 0, 2: 1, 3: 1, 4: 1}[row['r']])
        if np.isnan(loss.data.item()):
            # Exception Case
            print(row)
            continue
        loss.backward()
        optimizer.step()
        model.apply(clipper)

        if (k * epoch_len + i) % print_len == 0:
            tqdm.write(f"\niteration: {k * epoch_len + i + 1}")
            for name, param in model.named_parameters():
                tqdm.write(f"{name}: {param}")

            checkpoint = {
                "net": model.state_dict(),
                "optimizer": optimizer.state_dict(),
                "iteration": (k * epoch_len + i) // print_len
            }

end = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
torch.save(checkpoint, f'./model-{end}.pth')

defaultStability = round(float(dict(model.named_parameters())['f_s'].data),4)
defaultDifficulty = round(float(dict(model.named_parameters())['f_d'].data),4)
increaseFactor, difficultyDecay, stabilityDecay, retrievabilityFactor, lapsesBase = map(lambda x: round(float(x), 4), dict(model.named_parameters())['s_w'].data)

print("\nTraining finished!")

100%|██████████| 56910/56910 [00:08<00:00, 7024.21it/s]


Tensorized!

iteration: 1
f_s: Parameter containing:
tensor([1.9999], requires_grad=True)
f_d: Parameter containing:
tensor([4.9999], requires_grad=True)
s_w: Parameter containing:
tensor([ 3.0001, -0.6999, -0.1999,  1.0001, -0.2999], requires_grad=True)

iteration: 5692
f_s: Parameter containing:
tensor([2.0368], requires_grad=True)
f_d: Parameter containing:
tensor([4.9003], requires_grad=True)
s_w: Parameter containing:
tensor([ 3.0831, -0.6272, -0.1495,  1.0813, -0.2776], requires_grad=True)

iteration: 11383
f_s: Parameter containing:
tensor([2.0937], requires_grad=True)
f_d: Parameter containing:
tensor([4.8334], requires_grad=True)
s_w: Parameter containing:
tensor([ 3.1344, -0.5846, -0.1229,  1.1321, -0.2554], requires_grad=True)

iteration: 17074
f_s: Parameter containing:
tensor([2.1703], requires_grad=True)
f_d: Parameter containing:
tensor([4.7928], requires_grad=True)
s_w: Parameter containing:
tensor([ 3.1569, -0.5707, -0.1132,  1.1538, -0.2170], requires_grad=True)

iter

## 3 Result

Copy the optimal parameters for FSRS for you in the output of next code cell after running.

The scheduler code of FSRS4Anki is at https://github.com/open-spaced-repetition/fsrs4anki/blob/main/fsrs4anki_scheduler.js

In [None]:
print(f"const defaultDifficulty = {defaultDifficulty};")
print(f"const defaultStability = {defaultStability};")
print(f"const difficultyDecay = {difficultyDecay};")
print(f"const stabilityDecay = {stabilityDecay};")
print(f"const retrievabilityFactor = {retrievabilityFactor};")
print(f"const increaseFactor = {increaseFactor};")
print(f"const lapsesBase = {lapsesBase};")

const defaultDifficulty = 4.6179;
const defaultStability = 2.5636;
const difficultyDecay = -0.5913;
const stabilityDecay = -0.1382;
const retrievabilityFactor = 1.1951;
const increaseFactor = 3.201;
const lapsesBase = -0.0562;


You can see the memory states and intervals generated by FSRS as if you press the good in each review at the due date scheduled by FSRS.

In [None]:
requestRetention = 0.9  # recommended setting: 0.8 ~ 0.9

class Collection:
    def __init__(self):
        self.model = model

    def states(self, t_history, r_history):
        with torch.no_grad():
            line_tensor = lineToTensor(list(zip([t_history], [r_history]))[0])
            output_t = [(self.model.zero, self.model.zero, self.model.zero)]
            for input_t in line_tensor:
                output_t.append(self.model(input_t, *output_t[-1]))
            return output_t[-1]

my_collection = Collection()
t_history = "0"
r_history = "3"  # the first rating of the new card
print("stability, difficulty, lapses")
for i in range(15):
    states = my_collection.states(t_history, r_history)
    print(states)
    next_t = round(float(np.log(requestRetention)/np.log(0.9) * states[0]))
    t_history += f',{int(next_t)}'
    r_history += f",3"
print(t_history)

stability, difficulty, lapses
(tensor(2.5636), tensor(4.6179), tensor(0.))
(tensor(5.8480), tensor(4.6019), tensor(0.))
(tensor(11.7162), tensor(4.5994), tensor(0.))
(tensor(22.3811), tensor(4.5971), tensor(0.))
(tensor(40.2613), tensor(4.5988), tensor(0.))
(tensor(70.2326), tensor(4.5994), tensor(0.))
(tensor(118.7977), tensor(4.5997), tensor(0.))
(tensor(195.5733), tensor(4.5995), tensor(0.))
(tensor(313.6113), tensor(4.5993), tensor(0.))
(tensor(490.7700), tensor(4.5992), tensor(0.))
(tensor(751.1704), tensor(4.5992), tensor(0.))
(tensor(1126.7081), tensor(4.5992), tensor(0.))
(tensor(1659.5579), tensor(4.5992), tensor(0.))
(tensor(2403.5154), tensor(4.5991), tensor(0.))
(tensor(3427.1533), tensor(4.5991), tensor(0.))
0,3,6,12,22,40,70,119,196,314,491,751,1127,1660,2404,3427
