In [None]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
from lora import LoRA

# Load the pre-trained GPT-2 model and tokenizer
tokenizer = GPT2Tokenizer.from_pretrained('gpt2')
model = GPT2LMHeadModel.from_pretrained('gpt2')

# Load the fine-tuned LoRA model
lora_a = LoRA(model)
lora_a.load_pretrained('lora_finetuned_gpt2_a')

lora_b = LoRA(model)
lora_b.load_pretrained('lora_finetuned_gpt2_b')

In [None]:
from sage import sage

def get_target_response(question: list, model, tokenizer):
    inputs = tokenizer(question, return_tensors='pt')
    outputs = model.generate(**inputs)
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    return response


def create_training_data(questions, model_a, model_b, tokenizer, threshold):
    training_data = []

    for question in questions:
        a_response = get_target_response(question, model_a, tokenizer)
        b_response = get_target_response(question, model_b, tokenizer)
        
        a_score = sage.score(question, a_response, use_rots=True)
        b_score = sage.score(question, b_response, use_rots=True)
        
        if a_score > threshold:
            training_data.append((question, a_response))
        elif b_score > threshold:
            training_data.append((question, b_response))
        else:
            continue
    
    return training_data



In [None]:
questions = ['What makes us human?']
threshold = 0.8
training_data = create_training_data(questions, lora_a, lora_b, tokenizer, threshold)

# Train the model
# we need to alternate the training between the two models
# TODO how to alternate?
# need to set some kind of threshold for when to switch
lora_a.train(training_data)
# lora_b.train(training_data)



In [None]:
lora_a.save_pretrained('lora_finetuned_gpt2_p2_a')
lora_b.save_pretrained('lora_finetuned_gpt2_p2_b')

In [None]:
# we should have our own test set to evaluate the model
# human evaluation maybe