In [14]:
import json
from pathlib import Path

import pandas as pd
import random
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer

In [15]:
tokenizer = AutoTokenizer.from_pretrained("/nfs/public/hf/models/meta-llama/Meta-Llama-3-8B-Instruct")
model = AutoModelForCausalLM.from_pretrained("/nfs/public/hf/models/meta-llama/Meta-Llama-3-8B-Instruct",
    # token='hf_xxx'
    device_map="auto",
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    local_files_only=True
)

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.
Loading checkpoint shards: 100%|██████████| 4/4 [00:04<00:00,  1.15s/it]


In [16]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
model.to(device)

tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id
tokenizer.padding_side = 'left'

In [17]:
def load_data(
        template: str | Path,
    ) -> dict:

    if isinstance(template, Path):
        path = template

    else:
        path = Path('..', 'resources', 'data', f'{template}.json')

    if not path.exists():
        raise FileNotFoundError(f'The template {template} does not exist.')
    with path.open(encoding='utf-8') as f:
        data = json.load(f)

    return data

In [18]:
def assemble_messages(data, system_content):

    inputs = []
    for example in data.values():

        user_content = ""
        user_content += f"Question: {example['question']}\n\nProcess: "
        for step in example['explanation']:
            user_content += f"{step['step']}. {step['explanation']}. "

        messages = [
            {
                "role": "system",
                "content": system_content
            },
            {
                "role": "user",
                "content": user_content
            }
        ]

        inputs.append(messages)

    return inputs

In [19]:
def generate_response(inputs):

    input_messages = tokenizer.apply_chat_template(inputs, padding=True, tokenize=True, return_tensors="pt", add_generation_prompt=True).to(device)
    outputs = model.generate(input_messages, max_new_tokens=256)
    responses = [tokenizer.decode(output, skip_special_tokens=True) for output in outputs]

    return responses

In [20]:
def save_results(path, system_content, data, responses):

    for k, r in zip(data, responses):

        # check if the key already exists
        if 'llm-selection-summary' not in data[k]:
            data[k]['llm-selection-summary'] = []

        result = {
            'system_prompt': system_content,
            'response': r
        }
        data[k]['llm-selection-summary'].append(result)

    with path.open(mode='w', encoding='utf-8') as f:
        json.dump(data, f, indent=4)

In [21]:
system_content = f"Here is a question and the process that was used to solve it. Select the steps of that process that are most important for inclusion in a summary explanation of that process. Then, rewrite these steps into a short summary. Output only this short summary paragraph and nothing else."

BASE_DIR = Path('..', 'resources', 'data')
templates = BASE_DIR.iterdir()

for template in templates:
    data = load_data(template)
    inputs = assemble_messages(data, system_content)
    responses = generate_response(inputs)
    save_results(template, system_content, data, responses)

The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.
The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:128001 for open-end gene

In [27]:
print(responses[8])

system

Here is a question and the process that was used to solve it. Select the steps of that process that are most important for inclusion in a summary explanation of that process. Then, rewrite these steps into a short summary. Output only this short summary paragraph and nothing else.user

Question: In 2020, which country in Central Asia had the lowest energy consumption?

Process: 1. A list of countries in Central Asia was needed. 2. 5 countries in Central Asia, including Uzbekistan, Turkmenistan and Kazakhstan, were found. 3. The energy consumption for each of these countries in 2020 was needed for a comparison. 4. Data for each country's energy consumption in 2020 was found. 5. The answer was found by comparing the values to each other.assistant

Here is the most important steps of the process:

1. A list of countries in Central Asia was needed.
2. The energy consumption for each of these countries in 2020 was found.

Summary: In 2020, Turkmenistan had the lowest energy consumpt