In [2]:
import torch
import torch.nn.functional as F
from torch.optim import Adam
from transformers import T5ForConditionalGeneration, T5Tokenizer
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Load the T5 tokenizer and models
tokenizer = T5Tokenizer.from_pretrained("t5-small")
english_to_lingo_model = T5ForConditionalGeneration.from_pretrained("t5-small")
lingo_to_english_model = T5ForConditionalGeneration.from_pretrained("t5-small")

# Define the environment that rewards Lingo translations
class LingoTranslationEnvironment:
    def __init__(self, tokenizer, english_to_lingo_model, lingo_to_english_model):
        self.tokenizer = tokenizer
        self.english_to_lingo_model = english_to_lingo_model
        self.lingo_to_english_model = lingo_to_english_model

    def get_reward(self, english_text, lingo_text, back_to_english_text):
        # Compute similarity between the original English text and back-to-English translation
        similarity = F.cosine_similarity(
            torch.tensor(english_text).unsqueeze(0),
            torch.tensor(back_to_english_text).unsqueeze(0),
        )

        # Compute the reward based on translation similarity and Lingo token length
        reward = similarity - 0.1 * len(lingo_text.split())

        return reward.item()