In [None]:
from utils.dataloader import load_temusnews_data_raw
import textgrad as tg

data = load_temusnews_data_raw()

# Split data into train and test
split_ratio = 0.7
split_point = int(len(data) * split_ratio)
train_data = data[:split_point]
test_data = data[split_point:]

example = data[3]
example

In [2]:
system_prompt_string = tg.Variable(
    "Summarize the following article: \n",
    role_description="Instruction and style how to summarize the news article. Should be at most 50 words.",
    requires_grad=True,
)

tg.set_backward_engine("gpt-4o", override=True)
model = tg.BlackboxLLM("gpt-4o", system_prompt=system_prompt_string)
losses_list = []

optimizer = tg.TGD(parameters=[system_prompt_string])

In [None]:
def train_single(data, system_prompt_string, optimizer, model, losses_list, n_epochs=1):
    for k in range(n_epochs):
        for i, example in enumerate(data):
            news_string = tg.Variable(
                example["full_text"],
                role_description="String containing company monthly news that we want to summarize",
                requires_grad=False,
            )

            answer = model(news_string)
            answer.set_role_description(
                "A summary of the news article by the director of the company"
            )

            evaluation_instruction = (
                f"Here's a string containing news: {example['full_text']}. "
                f"Here is a string containing the summary of the director of the company: {example['short_text']}. "
                "You should evaluate the summaries to this news article in terms of syntax, style and personality. "
                "It should be as they were written by the same person."
            )

            loss_fn = tg.TextLoss(evaluation_instruction)
            loss = loss_fn(answer)
            loss.backward()
            optimizer.step()
            optimizer.zero_grad()

            losses_list.append(loss)

            # Print the info in a more visually separated and appealing way
            print(" ========= ")
            print(f"Epoch: {k + 1}, Iteration {i + 1}")
            print(" --------- ")
            print(f"Loss: {loss}")
            print(" --------- ")
            print(f"Prompt: {system_prompt_string}")
            print(" ========= ")


train_single(
    train_data, system_prompt_string, optimizer, model, losses_list, n_epochs=3
)

In [None]:
# Make an evaluation of the model for each example, showing the original text, the summary and the generated summary
def eval_model(model, data):
    for i, example in enumerate(data):
        news_string = example["full_text"]
        summary_string = example["short_text"]

        V_news_string = tg.Variable(
            news_string,
            role_description="String containing some news that we want to summarize",
            requires_grad=False,
        )

        answer = model(V_news_string)

        print("================================")
        print(f"Example {i + 1}")
        print(f"News: {news_string}")
        print("--------------------------------")
        print(f"Summary: {summary_string}")
        print("--------------------------------")
        print(f"Generated summary: {answer.value}")
        print("--------------------------------")
        print("\n")


eval_model(model, train_data)

In [None]:
eval_model(model, test_data)