In [32]:
import os
import ipywidgets as widgets
import pandas as pd
import numpy as np
import re
from transformers import TextDataset, DataCollatorForLanguageModeling
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from transformers import Trainer, TrainingArguments
from transformers import PreTrainedTokenizerFast, GPT2LMHeadModel, GPT2TokenizerFast, GPT2Tokenizer


In [2]:
trained_model_folder = "./out"
language_pair_staging_folder = "../data/magic_token_folder/"

In [33]:
if os.path.exists(trained_model_folder) and os.path.isdir(trained_model_folder):
    # Get a list of all subfolders in language_pair_staging_folder
    subfolders = [folder for folder in os.listdir(trained_model_folder) if os.path.isdir(os.path.join(trained_model_folder, folder))]

    # Filter subfolders that end with "_model"
    model_folders = [folder for folder in subfolders if folder.endswith("_model") or folder.endswith( "_model_step" ) ]

    # Print or use the list of model folders
    print("Folders ending with '_model':", model_folders)
else:
    print(f"The folder '{trained_model_folder}' does not exist or is not a directory.")

Folders ending with '_model': ['bsb_model_step', 'greek_model', 'greek_model_step', 'target_model_step', 'bsb_model', 'target_model', 'hebrew_model']


# Inference
copying code from https://www.kaggle.com/code/changyeop/how-to-fine-tune-gpt-2-for-beginners/notebook#Step-3.-Inference

In [23]:
def load_model(model_path):
    model = GPT2LMHeadModel.from_pretrained(model_path)
    return model


def load_tokenizer(tokenizer_path):
    tokenizer = GPT2Tokenizer.from_pretrained(tokenizer_path)
    return tokenizer


def generate_text(sequence, max_length, model_path ):
    model = load_model(model_path)
    tokenizer = load_tokenizer(model_path)
    ids = tokenizer.encode(f'{sequence}', return_tensors='pt')
    print( f"Sequence ids {ids}" )
    final_outputs = model.generate(
        ids,
        do_sample=True,
        max_length=max_length,
        pad_token_id=model.config.eos_token_id,
        top_k=50,
        top_p=0.95,
    )
    print(tokenizer.decode(final_outputs[0], skip_special_tokens=False))

In [34]:
selected_model_dropdown = Dropdown(options=model_folders)
print( "Select which model to test" )
display(selected_model_dropdown)

Select which model to test


Dropdown(options=('bsb_model_step', 'greek_model', 'greek_model_step', 'target_model_step', 'bsb_model', 'targ…

In [30]:
generate_text( "[MAT 1:1_a][MAT 1:1_b]", 100, os.path.join( trained_model_folder, selected_model_dropdown.value ) )

Sequence ids tensor([[96547, 96548]])
[MAT 1:1_a] [MAT 1:1_b] οὐκ  ἐστιν  λέγουσιν  ὅτι  ἐν  τῷ  Θεῷ  ἐσχάτων· ἀμελφοί, ἔχω  ἐστιν. καὶ  οἱ


In [28]:
generate_text( "[MAT 1:1_a][MAT 1:1_b]", 100, os.path.join( trained_model_folder, selected_model_dropdown.value ) )

Sequence ids tensor([[96547, 96548]])
[MAT 1:1_a] [MAT 1:1_b] He also sent his daughter, a beautiful virgin from Jerusalem, to the city where the king had sent him. She was twenty-three years old, and the daughter of King Darius, the daughter of Zadok the Jezreelite. She stayed there with the servants of King Darius, the master of the temple. When Queen Esther saw that his daughter was with the king, she was greatly alarmed. And the maidservant who stayed with the king said to the king,


In [31]:
generate_text( "[MAT 1:1_a][MAT 1:1_b]", 100, os.path.join( trained_model_folder, selected_model_dropdown.value ) )

Sequence ids tensor([[96547, 96548]])
[MAT 1:1_a] [MAT 1:1_b] So they made a covenant of peace with their fathers—one lawgiver at the base of Pisgah, one shepherd at Shechem, and one at the ridge of Heshbon. And it was faithfully followed. [2SA 15:30_a] [JOB 14:7_b] Then the sons of Jacob swore an oath to [GEN 30:1_a] [EZR 10:17_b] their fathers, saying: ‘I will never again oppress you or make you a stench to the Israelites.’ And they did so as their fathers had commanded, until the day that


In [35]:
generate_text( "[MAT 1:1_a][MAT 1:1_b]", 100, os.path.join( trained_model_folder, selected_model_dropdown.value ) )

Sequence ids tensor([[96547, 96548]])
[MAT 1:1_a] [MAT 1:1_b] Yisa woroghe, “Anin woro nari unan kifoghe,” Ndin bellu minu nnuf ugo na nan nkoli me wadin suwe.” Udiya ghe gono kadura Kutelle anin woro nono kadura me. Ndin sozu ghe. Nanya kubi Refugees, anin woro nono kadura me,”nmini
