In [None]:
import os
import json
import time
from tqdm import tqdm
from transformers import AutoModelForCausalLM, AutoTokenizer
#from instruction import construct_instruction
#from process_response import process_response
from extract_response import extract_response
from unsloth import FastLanguageModel 
from unsloth import is_bfloat16_supported
import torch
from trl import SFTTrainer
from transformers import TrainingArguments
from datasets import load_dataset
from tqdm import tqdm
import time
import pandas as pd
import re
import json
import csv
from datasets import load_dataset
from datasets import Dataset

random_seed = 100

import re 

def extract_value(pattern, text):
    match = re.search(pattern, text)
    return match.group(1).strip() if match else ""


In [None]:
def zeroshot_aug_generate(aug_dataset, modelname, model_generate_dir,args):
    start_time = time.time()
    max_attempts=args.get("max_attempts")
    max_seq_length=args.get("max_seq_length")
    max_new_tokens=args.get("max_new_tokens")

    responses_primary = []

    model, tokenizer = FastLanguageModel.from_pretrained(
                                                        model_name=modelname,
                                                        max_seq_length=max_seq_length,
                                                        dtype=None,
                                                        load_in_4bit=True
                                                    )
    FastLanguageModel.for_inference(model)
    EOS_TOKEN = tokenizer.eos_token
    
    
    for example in tqdm(aug_dataset, desc="Processing examples"):
        instruction =''''''
        input_text = example['content']
        input_text = '.'.join(input_text.split('.')[:-1]) + '.'
        augment=example['aug_content']

        prompt_format = """
                ### Instruction:
                {}
                ### Input:
                @@Input
                {}
                ### Augument:
                * @Augment is for find relate words to determine words in @Input
                * It is not allowed to extract words from here
                {}
                ### Response:
                """
        prompt = prompt_format.format(instruction, input_text,augment) + EOS_TOKEN

        max_attempts =max_attempts # 最大尝试次数，防止无限循环
        attempts = 0

        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
        generated_ids = model.generate(
                                        inputs['input_ids'],
                                        attention_mask=inputs['attention_mask'],
                                        max_new_tokens=max_new_tokens,  # 增加生成长度以确保足够的信息
                                        eos_token_id=tokenizer.eos_token_id,
                                        pad_token_id=tokenizer.pad_token_id
                                    )
        response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        primary = extract_response(response)
        print(extract_response)
        response = process_response(primary)
        
        while all(value == '' for value in response.values()) and attempts < max_attempts:
            attempts += 1
            inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
            generated_ids = model.generate(
                                            inputs['input_ids'],
                                            attention_mask=inputs['attention_mask'],
                                            max_new_tokens=max_new_tokens,  # 增加生成长度以确保足够的信息
                                            eos_token_id=tokenizer.eos_token_id,
                                            pad_token_id=tokenizer.pad_token_id
                                        )
            response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
            primary = extract_response(response)
            response = process_response(primary)
        if attempts >= max_attempts:
            print("达到最大尝试次数，仍未获得有效响应。")
        responses_primary.append(response)
    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Total time taken: {elapsed_time} seconds")
    
    
    output_file = model_generate_dir
    with open(output_file, "w") as f:
        json.dump(responses_primary, f, indent=4)
    print("大模型生成的json保存成功!!|!")
    return responses_primary

In [None]:
def zeroshot_generate(aug_dataset, modelname, args):
    start_time = time.time()
    max_attempts = args.get("max_attempts")
    max_seq_length = args.get("max_seq_length")
    max_new_tokens = args.get("max_new_tokens")

    # 移动模型加载到循环外部，避免重复加载
    model, tokenizer = FastLanguageModel.from_pretrained(
                                                    model_name=modelname,
                                                    max_seq_length=max_seq_length,
                                                    dtype=None,
                                                    load_in_4bit=True
                                                )
    FastLanguageModel.for_inference(model)
    EOS_TOKEN = tokenizer.eos_token
    instruction = instruction #### instruction of each datasets

    responses_primary = []

    for example in tqdm(aug_dataset, desc="Processing examples"):
        
        input_text = example['content']
        #input_text = '.'.join(input_text.split('.')[:-1]) + '.'

        prompt_format = """
                ### Instruction:
                {}
                ### Input:
                @@Input
                {}
                ### Augument:
                * @Augment is for find relate words to determine words in @Input
                * It is not allowed to extract words from here
                {}
                ### Response:{}
                """
        prompt = prompt_format.format(instruction, input_text, "", "") + EOS_TOKEN
        inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to("cuda")
        
        # 优化生成设置
        generated_ids = model.generate(
                                        inputs['input_ids'],
                                        attention_mask=inputs['attention_mask'],
                                        max_new_tokens=max_new_tokens,
                                        eos_token_id=tokenizer.eos_token_id,
                                        pad_token_id=tokenizer.pad_token_id
                                    )
        
        response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
        primary = extract_response(response)
        response = process_response(primary)
        responses_primary.append(response)

    end_time = time.time()
    elapsed_time = end_time - start_time
    print(f"Total time taken: {elapsed_time} seconds")

    return responses_primary  # 增加返回值以便调用该函数时能够得到结果


In [None]:
args={
    "sample_num":500,
    "max_attempts":2,
    "max_seq_length":2048,
   " max_new_tokens":200,

}
fourbit_models = [
    #"unsloth/mistral-7b-v0.3-bnb-4bit",      
    #"unsloth/mistral-7b-instruct-v0.3-bnb-4bit",
    "unsloth/mistral-7b-bnb-4bit",
    #"unsloth/llama-3-8b-bnb-4bit",           
    #"unsloth/llama-3-8b-Instruct-bnb-4bit",   
]

import json
aug_data=json.load(open("/local/home/sumyao/ysmpubmed/Models/NERmodels/merged_auged_ebm.txt"))
for modelname in fourbit_models:
    model_generate_dir=modelname.replace("unsloth/","")+"_zeroaug_generated.txt"
    responses_primary=zeroshot_generate(aug_dataset=aug_data,modelname=modelname,args=args)

In [None]:
model, tokenizer=FastLanguageModel.from_pretrained(
                model_name="unsloth/mistral-7b-bnb-4bit", # or llama-8b
                max_seq_length=2048,
                dtype=None,
                load_in_4bit=True

)

import json
aug_data=json.load(open("/local/home/sumyao/ysmpubmed/Models/NERmodels/merged_auged_ebm.txt"))
input_text=aug_data[2].get("content")
# 测试
instruction = instruction #### instruction of each datasets
prompt_format = """
                ### Instruction:
                {}
                ### Input:
                @@Input
                {}
                ### Augument:
                * @Augment is for find relate words to determine words in @Input
                * It is not allowed to extract words from here
                {}
                ### Response:{}
                """
EOS_TOKEN=tokenizer.eos_token
prompt = prompt_format.format(instruction, input_text, "", "") + EOS_TOKEN

FastLanguageModel.for_inference(model)

inputs = tokenizer([prompt], return_tensors="pt", padding=True, truncation=True).to("cuda")

# 优化生成设置
generated_ids = model.generate(
                                inputs['input_ids'],
                                attention_mask=inputs['attention_mask'],
                                max_new_tokens=1200,
                                eos_token_id=tokenizer.eos_token_id,
                                pad_token_id=tokenizer.pad_token_id
                            )

response = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
primary = extract_response(response)
print(primary)
response = process_response(primary)

response