# HEAD

In [1]:
%reload_ext autoreload
%autoreload 1
%aimport commons
%aimport articles_parser
%aimport generate_session
import json
from random import Random
from pathlib import Path
from supabase import create_client, Client
from dotenv import dotenv_values
import commons
import articles_parser
import generate_session

class K:
  root = Path(r"C:\Users\barco\OneDrive\docu\sugarbear")

# Generate Example

In [13]:
articles = articles_parser.parseArticles(open(K.root / "test-1.txt").read())
# print(articles)
wordsBase = commons.WordsBase.from_articles(articles)
def gen_options(index: int):
  return generate_session.GenerateOptions(
    preferred_density_per_chars=.01,
    allow_weak_segmentation=True,
    quiz_kind_weights={
      commons.QuizKind.filling: .5,
      commons.QuizKind.selection: .5,
      commons.QuizKind.ordering: 1.,
    }, random_seed = index)
example = sum(([
  generate_session.generateQuizArticle(
    article, gen_options(i), wordsBase)
  for article in articles
] for i in [0, 2]), [])
with open(K.root / "test-1.json", 'w') as fp:
  json.dump(example, fp, indent=2, ensure_ascii=False)

# Publish

In [14]:
config = dotenv_values("../.env.local")
supabase: Client = create_client(
  config["SUPABASE_URL"],
  config["SUPABASE_SERVICE_ROLE_KEY"])

In [15]:
supabase.from_("directories") \
  .delete().is_("parent_id", None).eq("name", "session").execute()
data = supabase.rpc("insert_folders_quick", {
  "names": ["session"],
  "parent_id": None,
}).execute().data
print(f"Session creation: {data}")
folder_id = data[0]['id']

Session creation: [{'name': 'session', 'id': 228}]


In [16]:
pages = [
  { 'name': str(index), 'content': json.dumps(page, indent=2, ensure_ascii=False) }
  for index, page in enumerate(example)
] + [{ 'name': "meta", 'content': json.dumps({ 'pages': len(example) }, indent=2, ensure_ascii=False) }]
print(supabase.rpc("insert_documents_quick", {
  "documents": pages,
  "parent_id": folder_id,  
}).execute().data)

[{'name': '0', 'id': 229}, {'name': '1', 'id': 230}, {'name': '2', 'id': 231}, {'name': '3', 'id': 232}, {'name': '4', 'id': 233}, {'name': '5', 'id': 234}, {'name': '6', 'id': 235}, {'name': '7', 'id': 236}, {'name': '8', 'id': 237}, {'name': '9', 'id': 238}, {'name': '10', 'id': 239}, {'name': '11', 'id': 240}, {'name': '12', 'id': 241}, {'name': '13', 'id': 242}, {'name': '14', 'id': 243}, {'name': '15', 'id': 244}, {'name': 'meta', 'id': 245}]


# UT

## Parsing

In [2]:
# test shuffle
def x():
  ans = list("abc")
  got_words, got_ans = generate_session._shuffle(ans, list("xyz"), Random(1))
  print("got:", got_words, got_ans)
  assert len(got_ans) == 1
  assert len(got_words) == 6
  assert [got_words[i] for i in got_ans[0]] == ans
  # test multi answers
  ans = list("abc")
  got_words, got_ans = generate_session._shuffle(
    ans, list("xyz"), Random(1), [[0, 2, 1], [0, 1, 2], [0, 2, 1]])
  print("got:", got_words, got_ans)
  assert len(got_ans) == 4
  assert len(got_words) == 6
  for idx in [0, 2]:
    assert [got_words[i] for i in got_ans[idx]] == ans
  for idx in [1, 3]:
    assert [got_words[i] for i in got_ans[idx]] == list("acb")
  # test variable answer
  ans = list("abc")
  got_words, got_ans = generate_session._shuffle(
    ans, list("xyz"), Random(0), [[0, 2, 1], [1, 'x', 'x']])
  print("got:", got_words, got_ans)
  assert len(got_ans) == 3
  assert len(got_words) == 6
  assert [got_words[i] for i in got_ans[0]] == ans
  assert [got_words[i] for i in got_ans[1]] == list("acb")
  idx = got_ans[2][0]
  assert isinstance(idx, int) and got_words[idx] == 'b'
  assert got_ans[2][1:] == ['x', 'x']
  #   assert [got_words[i] for i in got_ans[idx]] == list("acb")
x()

got: ['c', 'x', 'z', 'a', 'y', 'b'] [[3, 5, 0]]
got: ['c', 'x', 'z', 'a', 'y', 'b'] [[3, 5, 0], [3, 0, 5], [3, 5, 0], [3, 0, 5]]
got: ['y', 'c', 'b', 'a', 'z', 'x'] [[3, 2, 1], [3, 1, 2], [2, 'x', 'x']]


In [3]:
# test basic parsing
def x():
    c = articles_parser.parseLine(
        "基本[测/试]")
    for x in c.quizzes: print(x)
    print(c.text)
    assert c.text == "基本测试"
    assert c.quizzes[0].segments[0].separator_level == 0
    assert c.quizzes[0].segments[1].content == "试"
    assert (c.quizzes[0].text_start, c.quizzes[0].text_end) == (2, 4)
x()

QuizItem(segments=[QuizItemSegment(content='测', separator_level=0), QuizItemSegment(content='试', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[], text_start=2, text_end=4, alternative_answers=[])
基本测试


In [4]:
# test exclusive
def x():
    c = articles_parser.parseLine(
        "[嵌[==m00 套]][和][==m00 互斥]")
    for x in c.quizzes: print(x)
    print(c.text)
    for x in c.quizzes:
        if x.text() == "和":
            assert x.exclusive_id is None
        else:
            assert x.exclusive_id == "00"
    c = articles_parser.parseLine(
        "[[嵌套]自动[[互斥]]]")
    for x in c.quizzes: print(x)
    print(c.text)
    exid = c.quizzes[0].exclusive_id
    assert exid is not None
    for x in c.quizzes:
        assert x.exclusive_id == exid
x()

QuizItem(segments=[QuizItemSegment(content='嵌', separator_level=0), QuizItemSegment(content='套', separator_level=0)], exclusive_id='00', link_id=None, only_kinds=[], text_start=0, text_end=2, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='套', separator_level=0)], exclusive_id='00', link_id=None, only_kinds=[], text_start=1, text_end=2, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='和', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[], text_start=2, text_end=3, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='互斥', separator_level=0)], exclusive_id='00', link_id=None, only_kinds=[], text_start=3, text_end=5, alternative_answers=[])
嵌套和互斥
QuizItem(segments=[QuizItemSegment(content='嵌套', separator_level=0), QuizItemSegment(content='自动', separator_level=0), QuizItemSegment(content='互斥', separator_level=0)], exclusive_id=2334, link_id=None, only_kinds=[], text_start=0, text_end=6, alternative_answers=[])
QuizItem

In [5]:
# test linking
def x():
    c = articles_parser.parseLine(
        "[==l00 这个]和[==l00 这/个]同时[出]现")
    for x in c.quizzes: print(x)
    print(c.text)
    for idx in [0, 1]:
        assert c.quizzes[idx].link_id == "00"
    assert c.quizzes[2].link_id is None
x()

QuizItem(segments=[QuizItemSegment(content='这个', separator_level=0)], exclusive_id=None, link_id='00', only_kinds=[], text_start=0, text_end=2, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='这', separator_level=0), QuizItemSegment(content='个', separator_level=0)], exclusive_id=None, link_id='00', only_kinds=[], text_start=3, text_end=5, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='出', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[], text_start=7, text_end=8, alternative_answers=[])
这个和这个同时出现


In [6]:
# test type limits
def x():
    c = articles_parser.parseLine(
        "限定[==kfo 类型]呢")
    for x in c.quizzes: print(x)
    print(c.text)
    assert c.quizzes[0].only_kinds == [
        commons.QuizKind.filling, commons.QuizKind.ordering]
x()

QuizItem(segments=[QuizItemSegment(content='类型', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[<QuizKind.filling: 'f'>, <QuizKind.ordering: 'o'>], text_start=2, text_end=4, alternative_answers=[])
限定类型呢


In [7]:
# test alternative ansers
def x():
    c = articles_parser.parseLine(
        "[==a0132,0213,0231 甲/乙/丙/丁/的顺序]不那么重要")
    for x in c.quizzes: print(x)
    print(c.text)
    assert c.quizzes[0].alternative_answers == [[0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1]]
    c = articles_parser.parseLine(
        "[==axxxx4 甲/乙/丙/丁/的顺序]不那么重要")
    for x in c.quizzes: print(x)
    print(c.text)
    assert c.quizzes[0].alternative_answers == [['x', 'x', 'x', 'x', 4]]
x()

QuizItem(segments=[QuizItemSegment(content='甲', separator_level=0), QuizItemSegment(content='乙', separator_level=0), QuizItemSegment(content='丙', separator_level=0), QuizItemSegment(content='丁', separator_level=0), QuizItemSegment(content='的顺序', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[], text_start=0, text_end=7, alternative_answers=[[0, 1, 3, 2], [0, 2, 1, 3], [0, 2, 3, 1]])
甲乙丙丁的顺序不那么重要
QuizItem(segments=[QuizItemSegment(content='甲', separator_level=0), QuizItemSegment(content='乙', separator_level=0), QuizItemSegment(content='丙', separator_level=0), QuizItemSegment(content='丁', separator_level=0), QuizItemSegment(content='的顺序', separator_level=0)], exclusive_id=None, link_id=None, only_kinds=[], text_start=0, text_end=7, alternative_answers=[['x', 'x', 'x', 'x', 4]])
甲乙丙丁的顺序不那么重要


In [8]:
# generic test
def x():
    c = articles_parser.parseLine(
        "猫（拉丁学名：Felis silvestris catus），是[==m00 食肉目[猫科/猫属]]的[脊索//动物]。猫体型小，体色由[[==ks 蓝灰]//色]到棕黄色，体型瘦削，身长0.3-0.5米")
    for x in c.quizzes: print(x)
    print(c.text)
    assert c.quizzes[1].segments[1].content == "猫属"
    assert c.quizzes[1].exclusive_id == "00"
    assert c.quizzes[2].segments[1].separator_level == 1
x()

QuizItem(segments=[QuizItemSegment(content='食肉目', separator_level=0), QuizItemSegment(content='猫科', separator_level=0), QuizItemSegment(content='猫属', separator_level=0)], exclusive_id='00', link_id=None, only_kinds=[], text_start=32, text_end=39, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='猫科', separator_level=0), QuizItemSegment(content='猫属', separator_level=0)], exclusive_id='00', link_id=None, only_kinds=[], text_start=35, text_end=39, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='脊索', separator_level=0), QuizItemSegment(content='动物', separator_level=1)], exclusive_id=None, link_id=None, only_kinds=[], text_start=40, text_end=44, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='蓝灰', separator_level=0), QuizItemSegment(content='色', separator_level=1)], exclusive_id=2335, link_id=None, only_kinds=[], text_start=53, text_end=56, alternative_answers=[])
QuizItem(segments=[QuizItemSegment(content='蓝灰', separator_level=0)], 

## Generating

In [11]:
# test basic
def x():
    article = articles_parser.parseArticles(
"""==1标题
这是[一\\只/猫\\咪]
这是[==ks 另一只]""")[0]
    o = generate_session.GenerateOptions(
        preferred_density_per_chars=1,
        quiz_kind_weights={commons.QuizKind.filling: 1.})
    wb = commons.WordsBase([list("1234")])
    g = generate_session.generateQuizArticle(
        article, o, wb)
    print(g)
    assert g["text"] == "这是${一只猫咪}\n这是另一只\n"
    assert len(g["quizzes"]) == 1
    assert g["quizzes"][0]["answers"] == [[0, 1]]
    assert g["quizzes"][0]["entries"] == ['一\\只', '猫\\咪']
    o.quiz_kind_weights={commons.QuizKind.selection: 1.}
    g = generate_session.generateQuizArticle(
        article, o, wb)
    print(g)
    assert g["text"] == "这是${一只猫咪}\n这是${另一只}\n"
    q = g["quizzes"][1]
    assert q["entries"][q["answers"][0][0]] == "另一只"
x()

{'title': '标题', 'text': '这是${一只猫咪}\n这是另一只\n', 'quizzes': [{'kind': 'filling', 'answers': [[0, 1]], 'entries': ['一\\只', '猫\\咪']}]}
{'title': '标题', 'text': '这是${一只猫咪}\n这是${另一只}\n', 'quizzes': [{'kind': 'selection', 'answers': [[0]], 'entries': ['一只猫咪', '1234']}, {'kind': 'selection', 'answers': [[0]], 'entries': ['另一只', '1234']}]}


In [12]:
# alternative answers
def x():
    article = articles_parser.parseArticles(
"""==1标题
这是[==a10 一\\只/猫\\咪]
这是[==ks 另一只]""")[0]
    o = generate_session.GenerateOptions(
        preferred_density_per_chars=1,
        quiz_kind_weights={commons.QuizKind.filling: 1.})
    wb = commons.WordsBase([list("1234")])
    g = generate_session.generateQuizArticle(
        article, o, wb)
    print(g)
    assert g["text"] == "这是${一只猫咪}\n这是另一只\n"
    assert len(g["quizzes"]) == 1
    assert g["quizzes"][0]["answers"] == [[0, 1], [1, 0]]
    o.quiz_kind_weights={commons.QuizKind.selection: 1.}
    g = generate_session.generateQuizArticle(
        article, o, wb)
    print(g)
    assert g["text"] == "这是${一只猫咪}\n这是${另一只}\n"
    q = g["quizzes"][0]
    assert q["entries"][q["answers"][0][0]] == "一只猫咪"
    o.quiz_kind_weights={commons.QuizKind.ordering: 1.}
    g = generate_session.generateQuizArticle(
        article, o, wb)
    print(g)
    q = g["quizzes"][0]
    for idx, ans in [
        (0, "一只猫咪"), (1, "猫咪一只")
    ]:
        assert ''.join(
            q["entries"][i] for i in q["answers"][idx]) == ans
x()

{'title': '标题', 'text': '这是${一只猫咪}\n这是另一只\n', 'quizzes': [{'kind': 'filling', 'answers': [[0, 1], [1, 0]], 'entries': ['一\\只', '猫\\咪']}]}
{'title': '标题', 'text': '这是${一只猫咪}\n这是${另一只}\n', 'quizzes': [{'kind': 'selection', 'answers': [[0]], 'entries': ['一只猫咪', '1234']}, {'kind': 'selection', 'answers': [[0]], 'entries': ['另一只', '1234']}]}
{'title': '标题', 'text': '这是${一只猫咪}\n这是另一只\n', 'quizzes': [{'kind': 'ordering', 'answers': [[0, 1], [1, 0]], 'entries': ['一只', '猫咪', '4', '2']}]}
