In [None]:
#!pwd

In [None]:
#!pip install -qU transformers accelerate einops langchain xformers bitsandbytes

In [None]:
#!pip install torch
#!pip install torchvision

In [None]:
#pip install bitsandbytes

In [None]:
#pip install tokenizer

In [None]:
#!pip install -qU transformers accelerate einops langchain xformers bitsandbytes

In [None]:
#pip install transformers

In [20]:
from torch import cuda, bfloat16
import pickle
import transformers
from langchain import PromptTemplate,  LLMChain


model_id = 'meta-llama/Llama-2-7b-chat-hf'
device = f'cuda:{cuda.current_device()}' if cuda.is_available() else 'cpu'

In [3]:
# set quantization configuration to load large model with less GPU memory
# this requires the `bitsandbytes` library
bnb_config = transformers.BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_quant_type='nf4',
    bnb_4bit_use_double_quant=True,
    bnb_4bit_compute_dtype=bfloat16,
    load_in_8bit_fp32_cpu_offload=True,
)

# begin initializing HF items, need auth token for these
hf_auth = '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'
model_config = transformers.AutoConfig.from_pretrained(
    model_id,
    token=hf_auth
)

In [4]:
try:

    model = transformers.AutoModelForCausalLM.from_pretrained(
        "/scratch/lora.n/STS_Project/l2tune",
        #"/scratch/lora.n/LLaMa2/config.py",
        #from_tf = True,
        cache_dir='/scratch/lora.n/LLaMa2',
        trust_remote_code=True,
        config=model_config,
        quantization_config=bnb_config,
        device_map='auto',
        token=hf_auth
    )


except Exception as e:
    print("ERROR:", e)

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

In [7]:
model.eval()
print(f"Model loaded on {device}")

Model loaded on cuda:0


In [8]:
tokenizer = transformers.AutoTokenizer.from_pretrained(
    model_id,
    token=hf_auth
)

In [9]:
stop_list = ['\nHuman:', '\n```\n']

stop_token_ids = [tokenizer(x)['input_ids'] for x in stop_list]
stop_token_ids

[[1, 29871, 13, 29950, 7889, 29901], [1, 29871, 13, 28956, 13]]

In [10]:
import torch

stop_token_ids = [torch.LongTensor(x).to(device) for x in stop_token_ids]
stop_token_ids

[tensor([    1, 29871,    13, 29950,  7889, 29901], device='cuda:0'),
 tensor([    1, 29871,    13, 28956,    13], device='cuda:0')]

In [11]:
from transformers import StoppingCriteria, StoppingCriteriaList

# define custom stopping criteria object
class StopOnTokens(StoppingCriteria):
    def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool:
        for stop_ids in stop_token_ids:
            if torch.eq(input_ids[0][-len(stop_ids):], stop_ids).all():
                return True
        return False

stopping_criteria = StoppingCriteriaList([StopOnTokens()])

In [14]:
from langchain.llms import HuggingFacePipeline
pipe = transformers.pipeline(model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16, return_full_text=True,  
task='text-generation', stopping_criteria=stopping_criteria,  temperature=0.8, max_new_tokens=600, repetition_penalty=1.1)

llm = HuggingFacePipeline(pipeline=pipe)

In [15]:
B_INST, E_INST = "[INST]", "[/INST]"
B_SYS, E_SYS = "<>\n", "\n<>\n\n"

MAIN LOOP HERE

In [16]:
def get_prompt(instruction, new_system_prompt):
    SYSTEM_PROMPT = B_SYS + new_system_prompt + E_SYS
    prompt_template = B_INST + SYSTEM_PROMPT + instruction + E_INST
    return prompt_template

In [17]:
def GameRepeat(context, code):
    #initialize game and scenario
    scen = open(context +'.txt').read()
    game = open(code  +'.txt').read()
    
    #initialize system message
    sys_msg = B_SYS + "Consider the proposed scenario and act as if you were taking part in it. \n"+ scen + E_SYS
    #initalize user message
    cmds = B_INST + " Respond to the following using exactly one letter to denote your choice. Your answer must either consist of the letter 'C' for strategy C or 'D' for strategy D." + E_INST
    human_msg = cmds + "\nUser: {user_input}"
       
    template = get_prompt(human_msg, sys_msg)
    prompt = PromptTemplate(input_variables=["user_input"], template=template)
    lm = LLMChain(llm=llm,prompt=prompt,verbose=False, memory=None)
    
    stratlist =[]
    for i in range(0,300):
        rz = lm.predict(user_input = game)
        stratlist.append(rz)
    return stratlist

In [26]:
scenarios = ['IR', 'biz', 'friendsharing', 'environment', 'team'] 
games = ['prison', 'delight', 'staghunt', 'snowdrift']

results_7b = {}

for scenario in scenarios:
    for game in games:
        gamma = GameRepeat(scenario, game)
        results_7b.update({scenario+"_"+game:gamma})

with open('7b-fixed-results.txt', 'wb') as file:
    pickle.dump(results_7b, file)

ValueError: Missing some input keys: {'input'}

In [27]:
!jupyter nbconvert project2.ipynb --to script

[NbConvertApp] Converting notebook project2.ipynb to script
[NbConvertApp] Writing 5173 bytes to project2.py
