In [1]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="6"
import sys
sys.path.append("../")
from tqdm import tqdm
import json
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain import HuggingFacePipeline
from pprint import pprint
from agent.custom_parser import CustomMRKLOutputParser2, CustomMRKLOutputParser, CustomMRKLOutputParser3
from copy import deepcopy
from utils import load_openapi_spec, escape
from agent.tools import Tool, GetDetailsTool, tool_projection
from agent.tools import CustomInvalidTool
from transformers import StoppingCriteria
import torch
from langchain.schema import AgentAction, AgentFinish

In [2]:
data = json.load(open('../data/train_data_train_part', 'r'))

In [3]:
checkpoint_dir = 'vicuna-7b-toolalpaca_train_on_train_part/checkpoint-54'

tokenizer = AutoTokenizer.from_pretrained(checkpoint_dir, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(checkpoint_dir, trust_remote_code=True).half()

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [4]:
class StopSequenceCriteria(StoppingCriteria):
    def __init__(self, stop_sequences, tokenizer):
        if isinstance(stop_sequences, str):
            stop_sequences = [stop_sequences]
        self.stop_sequences = stop_sequences
        self.tokenizer = tokenizer

    def __call__(self, input_ids, scores, **kwargs) -> bool:
        decoded_output = self.tokenizer.decode(input_ids.tolist()[0])
        return any(
            decoded_output.endswith(stop_sequence)
            for stop_sequence in self.stop_sequences
        )

generator = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    device='cuda',
    do_sample=False,
    stopping_criteria=[StopSequenceCriteria('ASSISTANT Observation:', tokenizer)]
)
llm = HuggingFacePipeline(pipeline=generator)

In [5]:
generations = []
for item in tqdm(data):
    curr_generation = deepcopy(item)
    need_to_train = item[1]
    curr_prompt = ''
    prompts = []
    for idx, step in enumerate(item[0]):
        if need_to_train[idx]:
            prompts.append(curr_prompt)
        curr_prompt += step
    res = llm.generate(prompts)
    generation_idx = 0
    for idx in range(len(curr_generation[0])):
        if curr_generation[1][idx]:
            curr_generation[0][idx] = res.generations[generation_idx][0].text.split('\nASSISTANT Observation:')[0]
            generation_idx += 1
    generations.append(curr_generation)

In [9]:
json.dump(generations, open('../data/test_generations_train_model_trained_on_half_train.json', 'w'))

In [6]:
len(generations)

2261