In [1]:
from exllamav2 import(
    ExLlamaV2,
    ExLlamaV2Config,
    ExLlamaV2Cache,
    ExLlamaV2Tokenizer
)

from exllamav2.generator import (
    ExLlamaV2BaseGenerator,
    ExLlamaV2Sampler
)

model_dir = "/home/robbie/sdiff/exllamav2/vntl-7b-v0.3.1-hf-8.0bpw-exl2"

config = ExLlamaV2Config()
config.model_dir = model_dir
config.prepare()

model = ExLlamaV2(config)

cache = ExLlamaV2Cache(model, lazy = True)
model.load_autosplit(cache)

tokenizer = ExLlamaV2Tokenizer(config)

# Initialize generator

generator = ExLlamaV2BaseGenerator(model, cache, tokenizer)
generator.warmup()

In [2]:
import sqlite3
db = sqlite3.connect("../blume.db")

In [13]:
import re

names = {
    '#Name[1]': 'Mary',
    '': None,
    '？？？': '???',


}

prompt_start = """<s><<START>>
Name: Mary (メアリ) | Gender: Female
Name: Daniela Brancusi (ダニエラ・ブランクーシ) | Gender: Female
Name: Victor Friedrich (ヴィクトル・フリードリヒ) | Gender: Male
Name: Auguste Muller (オーギュスト・ミュラー) | Gender: Male
Name: Ilya Cantemir (イリヤ・カンテミール) | Gender: Female
Name: Richard Cantemir (リチャード・カンテミール) | Gender: Male | Aliases: Onii-chan (お兄ちゃん)
Name: Jacob Cantemir (ヤコブ・カンテミール) | Gender: Male | Aliases: mayor (村長)
Name: Virginia Moreno (バージニア・モレノ) | Gender: Female
Name: Klaus (クラウス) | Gender: Male
Name: Stefan (ステファン) | Gender: Male
Name: Relm (レルム) | Gender: Male
Name: Gerald Villbervint (ジェラルド・ヴィルベルヴィント) | Gender: Male
Name: Leo (レオ) | Gender: Male
Name: Gilbert (ギルベルト) | Gender: Male
"""

exp = re.compile("(?:\[(?:.+?)\]: )?(.+)", re.S)

seen = []

settings = ExLlamaV2Sampler.Settings()
settings.temperature = 0.6
#settings.top_k = 1
settings.top_p = 0.9

session = 'vntl-20240206'
scriptid = 104

unknown = False
for jpspeaker, in db.cursor().execute("SELECT DISTINCT speaker FROM lines WHERE scriptid = ?", (scriptid,)):
    if jpspeaker not in names:
        print("idk", jpspeaker)
        unknown = True

if unknown:
    raise KeyboardInterrupt

n = 0
for address, jpspeaker, line, translation in db.cursor().execute("SELECT lines.address, lines.speaker, lines.line, translations.translation FROM lines LEFT JOIN translations ON lines.scriptid = translations.scriptid AND lines.address = translations.address AND translations.session = ? WHERE lines.scriptid = ?", (session, scriptid)):
    line = line.replace("#Name[1]", "メアリ")
    enspeaker = names[jpspeaker]

    if enspeaker is not None:
        enspeaker = "[{}]: ".format(enspeaker)
    else:
        enspeaker = ""
    
    if jpspeaker != "":
        jpspeaker = "[{}]: ".format(jpspeaker)
    else:
        jpspeaker = ""

    if translation is not None:
        nucleus = "<<JAPANESE>>\n{}{}\n<<ENGLISH (fidelity = high)\n{}{}\n".format(jpspeaker, line, enspeaker, translation)
        seen.append(nucleus)
        print(enspeaker + translation)
        continue
    
    nucleus = "<<JAPANESE>>\n{}{}\n<<ENGLISH>> (fidelity = high)\n{}".format(jpspeaker, line, enspeaker)
    while True:
        prompt = ''.join([prompt_start, *seen, nucleus])
        inputs = tokenizer.encode(prompt, encode_special_tokens=True)[0]
        if len(inputs) <= 3840:
            break
        seen.pop(0)

    tl = generator.generate_simple(prompt, settings, 256, seed=0, encode_special_tokens=True)[len(prompt)-3:].strip()
    nucleus += tl + "\n"
    seen.append(nucleus)
    print(enspeaker + tl)
    db.cursor().execute("INSERT OR REPLACE INTO translations VALUES(?, ?, ?, ?)", (session, scriptid, address, tl))
    db.commit()

idk ヴォルマー
idk エミリオ
idk ディメトリオ
idk オリヴィア
idk コンラッド


KeyboardInterrupt: 