In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
import os
from llama import Workflow, Llama
from llama.util import find_free_port

os.environ["RANK"] = "0"
os.environ["WORLD_SIZE"] = "1"
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(find_free_port())

workflow = Workflow.build(
    ckpt_dir='/scratch4/jeisner1/tjbai/llama_8b',
    tokenizer_path='/scratch4/jeisner1/tjbai/llama_8b/tokenizer.model',
    max_seq_len=8192,
    max_batch_size=8,
    model_parallel_size=1,
    max_nodes=100,
    use_lora=True,
    lora_rank=32,
    lora_alpha=64,
    lora_dropout=0.05,
)

llama = Llama(workflow.model, workflow.tokenizer)

> initializing model parallel with size 1
> initializing ddp with size 1
> initializing pipeline with size 1
Converting to LoRA
Loaded in 16.92 seconds


In [3]:
# import json

# with open('/home/tbai4/llama3/data/triviaqa/unfiltered-web-train.json') as f:
#     data = json.load(f)
#     problems = data['Data']
#     problems = problems[:len(problems)]

# with open('/home/tbai4/llama3/data/triviaqa/unfiltered-web-dev.json') as f:
#     data = json.load(f)
#     problems = data['Data']
#     problems = problems[:len(problems)//2] # hold out half

## accuracy prior to fine-tuning

In [None]:
import json
import random
from collections import defaultdict
from tqdm import tqdm
from llama.workflows.qa import ask_parallel, eval_system_prompt, format_eval_user, parse_items

workflow.model.eval()

with open('/home/tbai4/llama3/data/triviaqa/unfiltered-web-dev.json') as f:
    data = json.load(f)
    problems = data['Data']
    
for num_questions in [16]:
    answers = []
    for seed in tqdm(range(50), desc='Generating'):
        workflow.reset()
        random.seed(seed)
        subset = random.sample(problems, k=num_questions)
        answer = ask_parallel(workflow, subset, annotate=True)
        answers.append((subset, workflow.tokenizer.decode(answer['output_tokens'])))

    workflow.model.reshape_cache(num_questions)
    workflow.model.set_adapter_state(enabled=False)
    correct = defaultdict(int)
    for subset, answer in tqdm(answers, desc='Evaluating'):
        individual_answers = parse_items(answer)
        resps = llama.chat_completion([
            [{'role': 'system', 'content': eval_system_prompt},
            {'role': 'user', 'content': format_eval_user(s, a)}]
            for s, a in zip(subset, individual_answers)
        ], content_prefills=['{"correct": "'] * min(num_questions, len(individual_answers)))

        for i, r in enumerate(resps):
            if 'true' in r['generation']['content'].lower():
                correct[i] += 1

    print(sorted(list(correct.items())))

## N=2, sequential (baseline)

In [20]:
import random
from tqdm import tqdm
from pathlib import Path
from llama.workflows.qa import ask_sequential

workflow.model.set_adapter_state(enabled=False)

output = []
for seed in tqdm(range(30)):
    random.seed(seed)
    output.append(f"\n## Trial {seed}\n")
    subset = random.sample(problems, k=2)
    answer = ask_sequential(workflow, subset)
    answer = workflow.tokenizer.decode(answer['output_tokens'])

    for i, (s, answer) in enumerate(zip(subset, parse_items(answer))):
        output.extend([
            f"### Question {i+1}\n",
            f"**Question:** {s['Question']}\n", 
            f"**Ground Truth:** {s['Answer']['Value']}\n",
            f"**Generated:** {answer}\n"
        ])
        if i < len(subset) - 1:
            output.append("\n---\n")

Path('/home/tbai4/llama3/dumps/sequential_dev_n2.md').write_text('\n'.join(output))

100%|██████████| 30/30 [00:44<00:00,  1.50s/it]


13163

## N=2, Parallel

In [38]:
import random
from tqdm import tqdm
from pathlib import Path

configs = [
   ('parallel_base_n2.md', False, False),
   ('parallel_annotated_n2.md', True, False), 
   ('parallel_linearized_n2.md', False, True),
   ('parallel_annotated_linearized_n2.md', True, True)
]

for filename, annotate, compact in configs:
    output = []
    for seed in tqdm(range(30), desc=f"annotate={annotate}, compact={compact}"):
        random.seed(seed)
        output.append(f"\n## Trial {seed}\n")
        subset = random.sample(problems, k=2)
        answer = ask_parallel(workflow, subset, annotate=annotate, compact=compact)

        for i, (s, answer) in enumerate(zip(subset, parse_items(answer))):
            output.extend([
                f"### Question {i+1}\n",
                f"**Question:** {s['Question']}\n", 
                f"**Ground Truth:** {s['Answer']['Value']}\n",
                f"**Generated:** {answer}\n"
            ])
            
        if i < len(subset) - 1:
            output.append("\n---\n")

    Path(f'/home/tbai4/llama3/dumps/triviaqa/{filename}').write_text('\n'.join(output))

annotate=False, compact=False: 100%|██████████| 30/30 [00:17<00:00,  1.74it/s]
annotate=True, compact=False: 100%|██████████| 30/30 [00:17<00:00,  1.75it/s]
annotate=False, compact=True: 100%|██████████| 30/30 [00:16<00:00,  1.81it/s]
annotate=True, compact=True: 100%|██████████| 30/30 [00:18<00:00,  1.64it/s]


## exploratory

In [12]:
res = llama.chat_completion(
    dialogs=[[
        {'role': 'system', 'content': 'Answer ALL of the user\'s questions. Answer with an numbered list. Do not include extraneous text.'},
        {'role': 'user', 'content': 'Which city does David Soul come from?'},
        {'role': 'user', 'content': 'Who was President when the first Peanuts cartoon was published?'},
        {'role': 'user', 'content': 'From which country did Angola achieve independence in 1975?'},
        {'role': 'user', 'content': 'Who won Super Bowl XX?'},
    ]]
)

print(res[0]['generation']['content'])

1. Chicago
2. Richard Nixon
3. Portugal
4. Chicago Bears


In [66]:
from operator import itemgetter as get

messages = [
    {'role': 'system', 'content': 'Answer ALL of the user\'s questions. Answer with an numbered list. Do not include extraneous text.'},
    {'role': 'user', 'content': 'Which city does David Soul come from?'},
    {'role': 'user', 'content': 'Who was President when the first Peanuts cartoon was published?'},
    {'role': 'user', 'content': 'From which country did Angola achieve independence in 1975?'},
    {'role': 'user', 'content': 'Who won Super Bowl XX?'}
]

[prompt] = workflow.insert([{'messages': messages, 'parent_ids': []}])

[response] = get('tokens')(workflow.step(
    [{
        'header': ('assistant', None),
        'prefill': '',
        'parent_ids': [prompt['id']]
    }]
))

print(workflow.tokenizer.decode(response))

1. Chicago
2. Richard Nixon
3. Portugal
4. Chicago Bears


In [76]:
from operator import itemgetter as get

workflow.reset()

[prompt] = workflow.insert([
    {
        'messages': [{'role': 'system', 'content': 'Answer ALL of the user\'s questions. Answer with an numbered list. Do not include extraneous text.'}],
        'parent_ids': []
    }
])

questions = workflow.insert([
        {
            'messages': [{'role': 'user', 'content': 'Question 1: Which city does David Soul come from?'}],
            'parent_ids': [prompt['id']],
        },
        {
            'messages': [{'role': 'user', 'content': 'Question 4: From which country did Angola achieve independence in 1975?'},],
            'parent_ids': [prompt['id']],
        },
        {
            'messages': [{'role': 'user', 'content': 'Question 3: Who won Super Bowl XX?'},],
            'parent_ids': [prompt['id']],
        },
        {
            'messages': [{'role': 'user', 'content': 'Question 2: Who was President when the first Peanuts cartoon was published?'},],
            'parent_ids': [prompt['id']],
        },
])

[response] = get('tokens')(workflow.step(
    tasks=[{
        'header': ('assistant', None),
        'prefill': '',
        'parent_ids': [prompt['id']] + [q['id'] for q in questions],
    }],
    compact=True
))

print(workflow.tokenizer.decode(response))

1. Gerald Ford


In [83]:
from operator import itemgetter as get

workflow.reset()

[prompt] = workflow.insert([
    {
        'messages': [{'role': 'system', 'content': 'Answer ALL of the user\'s questions. Answer with an numbered list. Do not include extraneous text.'}],
        'parent_ids': []
    }
])

questions = workflow.insert([
        {
            'messages': [
                {'role': 'user', 'content': 'Which city does David Soul come from?'},
                {'role': 'user', 'content': 'From which country did Angola achieve independence in 1975?'},
            ],
            'parent_ids': [prompt['id']],
        },
        {
            'messages': [
                {'role': 'user', 'content': 'Who won Super Bowl XX?'},
                {'role': 'user', 'content': 'Who was President when the first Peanuts cartoon was published?'},
            ],
            'parent_ids': [prompt['id']],
        },
])

[response] = get('tokens')(workflow.step(
    tasks=[{
        'header': ('assistant', None),
        'prefill': '',
        'parent_ids': [prompt['id']] + [q['id'] for q in questions],
    }],
))

print(workflow.tokenizer.decode(response))

1. Chicago
2. United States
