In [4]:
class LLMParticipant:

    def __init__(
            self,
            config: dict
        ) -> None:

        self.config = config
        self.MODEL = config['MODEL']
        self.model_family = self.MODEL.split('/')[0]
        self.TEMPERATURE = config['TEMPERATURE']
        self.BATCH_SIZE = config['BATCH_SIZE']
        self.EXAMPLE = config['EXAMPLE']
        self.FORMAT_VERSION = config['FORMAT_VERSION']
        self.SYSTEM_CONTENT = config['SYSTEM_CONTENT']
        self.INPUTS = config['INPUTS']

        self.data = self.load_input_questions()

        print(f"Loading model {self.MODEL}... ", end="")
        self.tokenizer = AutoTokenizer.from_pretrained(
            self.MODEL,
            trust_remote_code=True,
        )
        self.model = AutoModelForCausalLM.from_pretrained(self.MODEL,
            # token='hf_xxx'
            device_map="auto",
            trust_remote_code=True,
            torch_dtype=torch.bfloat16,
            # local_files_only=True
        )
        print("done.")

        self.device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
        self.model.to(self.device)

        self.tokenizer.pad_token = self.tokenizer.eos_token
        self.tokenizer.pad_token_id = self.tokenizer.eos_token_id
        self.tokenizer.padding_side = 'left'

    def load_input_questions(
            self,
        ) -> dict:

        with Path(self.INPUTS).open(encoding='utf-8') as f:
            data = json.load(f)

        return data

    def assemble_messages(
            self,
            data: dict,
        ) -> list[list[dict]]:

        inputs = []
        for example in data.values():

            user_content = ""
            user_content += f"Question: {example['question']} Process: "
            for step in example['explanation']:
                user_content += f"{step['step']}. {step['explanation']}. "
            user_content += f"List of selected steps for inclusion in summary: ["

            if self.model_family in ['']:

                messages = [
                    {
                        "role": "system",
                        "content": self.SYSTEM_CONTENT
                    },
                    {
                        "role": "user",
                        "content": user_content
                    }
                ]

            elif self.model_family in ['mistralai', 'microsoft']:
                messages = [
                    {
                        "role": "user",
                        "content": self.SYSTEM_CONTENT + ' ' + user_content
                    },
                ]

            elif self.model_family in ['google']:
                messages = f'{self.SYSTEM_CONTENT} {user_content}'

            inputs.append(messages)

        return inputs

    def apply_template_generate_response(
            self,
            inputs
        ) -> list[str]:

        responses = []
        batched_inputs = [inputs[i:i+self.BATCH_SIZE] for i in range(0, len(inputs), self.BATCH_SIZE)]

        for batch in batched_inputs:

            if self.model_family in ['', 'mistralai', 'microsoft']:
                input_messages = self.tokenizer.apply_chat_template(batch,
                                                                    padding=True,
                                                                    return_tensors="pt",
                                                                    tokenize=True,
                                                                    add_generation_prompt=True).to(self.device)
            elif self.model_family in ['google']:
                input_messages = self.tokenizer(batch, padding=True, truncation=True, return_tensors="pt").to(self.device)['input_ids']
            outputs = self.model.generate(input_messages,
                                          max_new_tokens=256,
                                          do_sample=True,
                                          temperature=self.TEMPERATURE
                                          )
            responses += self.tokenizer.batch_decode(outputs, skip_special_tokens=True)

        return responses

    def save_results(
            self,
            results: list[dict]
        ):

        output_filename = datetime.datetime.isoformat(datetime.datetime.now())
        output_path = Path("outputs", f"{Path(self.INPUTS).stem}", f"{output_filename}.json")
        log = Path('log.json')
        print(f"Saving results in {output_path}... ", end="")

        outputs = {
            "config": self.config,
            "results": results
        }

        with output_path.open('w', encoding='utf-8') as f:
            json.dump(outputs, f, indent=4)
        print("done.")

        with log.open('r', encoding='utf-8') as f:
            log_data = json.load(f)

        log_data[output_filename] = self.config
        # sort log
        log_data = dict(sorted(log_data.items()))

        with log.open('w', encoding='utf-8') as f:
            json.dump(log_data, f, indent=4)

    def participate(
            self
        ):

        print(f"Assembling messages for {len(self.data)} inputs... ", end="")
        inputs = self.assemble_messages(self.data)
        print("done.")
        print(f"Generating responses for {len(inputs)} inputs... ")
        responses = self.apply_template_generate_response(inputs)
        print("Done.")
        outputs = []
        for q, r in zip(self.data.keys(), responses):
            t = self.data[q]['template_id']
            outputs.append({
                "question_id": q,
                "template_id": t,
                "response": r,
            })

        self.save_results(outputs)

        return outputs


In [5]:
MODEL = 'microsoft/Phi-3-small-128k-instruct'
TEMPERATURE = 0.3
BATCH_SIZE = 20
EXAMPLE = '[1, 2, 3, 4]'
FORMAT_VERSION = 2
SYSTEM_CONTENT = f"Your role is to select, from a list the steps, only those that are most important for inclusion in a summary explanation of that process. Format your output as a list, for example {EXAMPLE}. Output only this short summary paragraph and nothing else.".format(EXAMPLE)
INPUTS = '../resources/data/select.json'
DESCRIPTION = "testing"

config = {
    "MODEL": MODEL,
    "TEMPERATURE": TEMPERATURE,
    "BATCH_SIZE": BATCH_SIZE,
    "EXAMPLE": EXAMPLE,
    "FORMAT_VERSION": FORMAT_VERSION,
    "SYSTEM_CONTENT": SYSTEM_CONTENT,
    "INPUTS": INPUTS,
    "DESCRIPTION": DESCRIPTION
}

In [6]:
P = LLMParticipant(config=config)

NameError: name 'Path' is not defined

In [None]:
P.participate()

Assembling messages for 400 inputs... done.
Generating responses for 400 inputs... 


You are not running the flash-attention implementation, expect numerical differences.


Done.
Saving results in outputs/select/2024-05-30T15:54:27.080799.json... done.


[{'question_id': '6ab9291eb34f4edc',
  'template_id': 'B1',
  'response': "Your role is to select, from a list the steps, only those that are most important for inclusion in a summary explanation of that process. Format your output as a list, for example [1, 2, 3, 4]. Output only this short summary paragraph and nothing else. Question: In Latin America and the Caribbean, which country had the lowest GDP in 2020? Process: 1. A list of countries in Latin America and the Caribbean was needed. 2. 49 countries in Latin America and the Caribbean, including Guadeloupe, Chile and Cuba, were found. 3. The GDP for each of these countries in 2020 was needed for a comparison. 4. Data for each country's GDP in 2020 was found. 5. The answer was found by comparing the values to each other. List of selected steps for inclusion in summary: [ 3, 4, 5"},
 {'question_id': '1503b5f8a99e44f0',
  'template_id': 'B1',
  'response': "Your role is to select, from a list the steps, only those that are most impor