In [None]:
import json
from vllm import LLM, SamplingParams
from tqdm import tqdm


In [None]:

TENSOR_PARALLEL_SIZE = 2  

TEMPERATURE = 1
TOP_P = 0.8
MAX_TOKENS = 1024 
STOP_TOKENS = ["###"]

In [None]:
PROMPT_TEMPLATE = """
### Input:
{input}

### Response:
"""


In [None]:
prompts = []
source_data = [] 

with open(DATA_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        try:
            data_item = json.loads(line.strip())
            
            if not data_item.get("instruction"):
                data_item["instruction"] = ""

            try:
                input_json = json.loads(data_item['input'])
                pretty_input = json.dumps(input_json, indent=4)
            except (json.JSONDecodeError, TypeError):
                pretty_input = data_item['input']

            prompt = PROMPT_TEMPLATE.format(
                instruction=data_item['instruction'],
                input=pretty_input
            )
            prompts.extend(prompt for _ in range(10))
            source_data.append(data_item)
        except json.JSONDecodeError:
            pass


In [None]:
llm = LLM(model=MODEL_PATH, tensor_parallel_size=TENSOR_PARALLEL_SIZE, gpu_memory_utilization=0.8)

sampling_params = SamplingParams(
    n=10,
    temperature=TEMPERATURE,
    top_p=TOP_P,
    max_tokens=MAX_TOKENS,
    stop=STOP_TOKENS
)


In [None]:
outputs = llm.generate(prompts, sampling_params)


In [None]:
results = []
for i, output in enumerate(outputs):
    generated_text = output.outputs[0].text.strip()

    result_item = source_data[0]
    result_item['generated_output'] = generated_text
    results.append(result_item)

with open(OUTPUT_PATH, 'w', encoding='utf-8') as f:
    for result in tqdm(results, desc=""):
        f.write(json.dumps(result, ensure_ascii=False) + '\n')

for i in range(len(results)):
    print(results[i]['generated_output'])