In [1]:
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

nf4_config = BitsAndBytesConfig(
   load_in_4bit=True,
   bnb_4bit_quant_type="nf4",
   bnb_4bit_use_double_quant=True,
   bnb_4bit_compute_dtype=torch.bfloat16
)

model_name = "lmg-anon/vntl-7b-v0.3.1-hf"
model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto", quantization_config=nf4_config)
tokenizer = AutoTokenizer.from_pretrained(model_name)

Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

In [2]:
import sqlite3
db = sqlite3.connect("../blume.db")

In [3]:
import re

names = {
    'Mary': 'メアリ',
    'Narrator': None,
    '???': '？？？',
    'Daniela': 'ダニエラ',
    'Victor': 'ヴィクトル',
    'Auguste': 'オーギュスト',
    'Ilia': 'イリア',
    'Richard': 'リチャード',
    'Jacob': 'ヤコブ'
}

prompt_start = """<<START>>
Name: Mary (メアリ) | Gender: Female
Name: Daniela Brancusi (ダニエラ・ブランクーシ) | Gender: Female
Name: Victor Friedrich (ヴィクトル・フリードリヒ) | Gender: Male
Name: Auguste Muller (オーギュスト・ミュラー) | Gender: Male
Name: Ilia Cantemir (イリア・カンテミール) | Gender: Female
Name: Richard Cantemir (リチャード・カンテミール) | Gender: Male | Aliases: Onii-chan (お兄ちゃん)
Name: Jacob Cantemir (ヤコブ・カンテミール) | Gender: Male | Aliases: mayor (村長)
"""

exp = re.compile("(?:\[(?:.+?)\]: )?(.+)", re.S)

seen = []

for address, speaker, line in db.cursor().execute("SELECT address, speaker, line FROM lines WHERE scriptid = 100"):
    line = line.replace("#Name[1]", "メアリ")
    jpspeaker = names[speaker]
    
    if jpspeaker is not None:
        jpspeaker = "[" + jpspeaker + "]: "
    else:
        jpspeaker = ""
    
    nucleus = "<<JAPANESE>>\n" + jpspeaker + line + "\n<<ENGLISH>> (fidelity = high)\n"
    while True:
        prompt = ''.join([prompt_start, *seen, nucleus])
        inputs = tokenizer(prompt, return_tensors="pt", return_length=True).to('cuda:0')
        if inputs.length <= 3840:
            del inputs['length']
            break
        seen.pop(0)
    
    outputs = model.generate(**inputs)
    tl = exp.match(tokenizer.batch_decode(outputs, skip_special_tokens=True, max_tokens=256)[0][len(prompt):].strip())[1]
    nucleus += tl + "\n"
    seen.append(nucleus)
    print(tl)
    #db.cursor().execute("INSERT OR REPLACE INTO translations VALUES('vntl-20240201', 100, ?, ?)", (address, tl))
    #db.commit()

「...」
I prayed to God this morning as well.
Yes. I didn't know what would happen next.
Or perhaps she'd closed her eyes and ears to everything she knew...
「...Fwaaah.」
(Aaaah! Not in front of God!)
She quickly corrected her posture.
Perhaps it was because she'd stayed up late the night before, but she was sleepy this morning.
(I needed to focus on my prayer.)
「...Mary? Where are you, Mary?」
(Oh, that voice...?)
「Ah, Mary. I see you're here.」
「Yes, Dani.」
As I'd expected. Dani had been looking for me.
She was a very beautiful woman. She was the second most revered person in this church, and that's why she knew so much about the church's teachings.
But she didn't seem to be very strict about the rules. Of course, she wasn't doing anything truly wrong.
She'd let me share some of the tasty sweets from the village with her, and she'd let me take a bath for longer than I was supposed to...
She was the kind of person who worried about our well-being and made sure we had time to relax.
My role