In [1]:
%reload_ext autoreload
%autoreload 2

In [2]:
import os
from llama import Workflow, Llama
from llama.util import find_free_port, load_ckpt

os.environ["RANK"] = "0"
os.environ["WORLD_SIZE"] = "1"
os.environ["MASTER_ADDR"] = "localhost"
os.environ["MASTER_PORT"] = str(find_free_port())

workflow = Workflow.build(
    ckpt_dir='/scratch4/jeisner1/tjbai/llama_8b',
    tokenizer_path='/scratch4/jeisner1/tjbai/llama_8b/tokenizer.model',
    max_seq_len=8*8192,
    max_batch_size=1,
    model_parallel_size=1,
    max_nodes=100,
)

llama = Llama(workflow.model, workflow.tokenizer)



> initializing model parallel with size 1
> initializing ddp with size 1
> initializing pipeline with size 1
Loaded in 16.69 seconds


In [5]:
import json
from llama.workflows.tot import load_math_problems, eval_solutions

llama.model.reshape_cache(4)

problems = load_math_problems('/home/tbai4/llama3/data/MATH', split='test')
with open('/home/tbai4/llama3/dumps/mad/postft_eval_test.json') as f:
    solutions = json.load(f)
    print(len(solutions))
    
outputs = eval_solutions(
    llama,
    [s['outputs']['decision']['Answer'] for s in solutions if isinstance(s['outputs']['decision'], dict)],
    problems,
)

with open('/home/tbai4/llama3/dumps/mad/choreo_ft_correct.json', 'w') as f:
    json.dump(outputs, f)

500


  0%|          | 17/5000 [00:02<14:09,  5.86it/s]


KeyboardInterrupt: 

In [3]:
import json
from llama.workflows.tot import load_math_problems, eval_solutions

with open('/home/tbai4/llama3/dumps/mad/postft_eval.json') as f:
    data = json.load(f)
    print(len(data))

llama.model.reshape_cache(4)

solutions = []
for d in data:
    outputs = d.get('outputs', {})
    if 'decision' in outputs and outputs['decision'] is not None:
        solutions.append(outputs['decision']['Answer'])
    else:
        solutions.append('')
        
problems = load_math_problems('/home/tbai4/llama3/data/MATH', split='val')

outputs = eval_solutions(
    llama,
    solutions,
    problems[:len(solutions)]
)

sum(outputs)

240


100%|██████████| 240/240 [00:46<00:00,  5.17it/s]


In [28]:
with open('/home/tbai4/llama3/dumps/mad/math_baseline_e2e.json') as f:
    data = json.load(f)
    
solutions = []
for d in data:
    outputs = d.get('outputs', {})
    if 'decision' in outputs and isinstance(outputs['decision'], dict):
        solutions.append(outputs['decision']['Answer'])
    else:
        solutions.append('')
    
problems = load_math_problems('/home/tbai4/llama3/data/MATH', split='train')[:500]

outputs = eval_solutions(
    llama,
    solutions,
    problems[:len(solutions)]
)
asdfasdf
sum(outputs)

100%|██████████| 500/500 [01:55<00:00,  4.32it/s]


194

In [3]:
load_ckpt(workflow, '/scratch4/jeisner1/tjbai/checkpoints/mad/lora_step-899.pt')

In [32]:
import json
from tqdm import tqdm
from llama.workflows.mad import mad_baseline, mad_cached
from llama.workflows.tot import load_math_problems
from llama.workflows.simple import math_direct

problems = load_math_problems('/home/tbai4/llama3/data/MATH', split='val')[:100]

workflow.reset()
outputs = mad_cached(
    workflow=workflow,
    problem=problems[0]['problem'],
    max_rounds=3,
    debug=True,
)

In [5]:
s = '''
{"Preference": "No", "Supported Side": "", "Reason": "Both sides agree on the non-deterministic behavior of the function and the final answer, but the negative side does not provide a clear preference for the affirmative side's analysis. The debate will continue to the next round.", "Answer": ""}
'''

from llama.workflows.mad import parse_decision

parse_decision(s)

'\n{"Preference": "No", "Supported Side": "", "Reason": "Both sides agree on the non-deterministic behavior of the function and the final answer, but the negative side does not provide a clear preference for the affirmative side\'s analysis. The debate will continue to the next round.", "Answer": ""}\n'

In [3]:
import json
with open('tmp') as f:
    sample = json.load(f)
    print(len(sample['outputs']['mod_tokens']))

2


In [4]:
from llama.workflows.finetune import MadTrainer

trainer = MadTrainer(
    workflow=workflow,
    output_dir='/scratch4/jeisner1/tjbai/checkpoints/bsm/',
    learning_rate=1e-5,
)

Training 54.5M / 8.1B parameters


In [None]:
from tqdm import tqdm

for i in tqdm(range(100)):
    loss, metrics = trainer.step(sample, debug=False)
    loss.backward()
    trainer.optimizer.step()
    trainer.optimizer.zero_grad()    
    if (i+1) % 5 == 0:
        print(metrics)

In [5]:
loss = trainer.step(sample, debug=True)

Selected chunk: initial (weights: {'initial': 2, 'rounds_first_half': 3, 'rounds_second_half': 3, 'final': 1})
Total loss value: 4.3199
Selected chunk: initial (loss: 0.4768067002296448)


In [9]:
loss[0].backward()

In [None]:
import json
from tqdm import tqdm
from llama.workflows.mad_iterative import math_simple_baseline
from llama.workflows.tot import load_math_problems

# MATH dataset
problems = load_math_problems('/home/tbai4/llama3/data/MATH', split='val')

# baseline with reflection on MATH
solutions = []
for problem in tqdm(problems):
    workflow.reset()
    solutions.append(math_simple_baseline(
        workflow=workflow,
        problem=problem['problem'],
        enable_reflection=True,
        debug=False,
    ))
    
with open('math_baseline_with_reflection.json', 'w') as f:
    json.dump(solutions, f)

# baseline without reflection on MATH
solutions = []
for problem in tqdm(problems):
    workflow.reset()
    solutions.append(math_simple_baseline(
        workflow=workflow,
        problem=problem['problem'],
        enable_reflection=False,
        debug=False,
    ))

with open('math_baseline_without_reflection.json', 'w') as f:
    json.dump(solutions, f)

In [None]:
import json
from tqdm import tqdm
from llama.workflows.mad_iterative import math_mad_cached, math_simple_baseline, load_ciar
from llama.workflows.tot import load_math_problems

# CIAR dataset
problems = load_ciar('/home/tbai4/llama3/data/CIAR', start=0, end=50)

# MAD cached on CIAR
solutions = []
for problem in tqdm(problems):
    workflow.reset()
    solutions.append(math_mad_cached(
        workflow=workflow,
        problem=problem['question'],
        max_rounds=3,
    ))
with open('improved_ciar_cached.json', 'w') as f:
    json.dump(solutions, f)

# baseline with reflection on CIAR
solutions = []
for problem in tqdm(problems):
    workflow.reset()
    solutions.append(math_simple_baseline(
        workflow=workflow,
        problem=problem['question'],
        enable_reflection=True,
    ))
with open('ciar_baseline_with_reflection.json', 'w') as f:
    json.dump(solutions, f)

# baseline without reflection on CIAR
solutions = []
for problem in tqdm(problems):
    workflow.reset()
    solutions.append(math_simple_baseline(
        workflow=workflow,
        problem=problem['question'],
        enable_reflection=False,
    ))
with open('ciar_baseline_without_reflection.json', 'w') as f:
    json.dump(solutions, f)

In [None]:
from tqdm import tqdm
from llama.workflows.mad_iterative import mad_baseline, mad_cached, simple_baseline, load_translations

translations = load_translations('/home/tbai4/llama3/data/commonmt', start=0, end=100)

for translation in tqdm(translations):
    workflow.reset()
    simple_baseline(workflow, translation['chinese'], debug=False, enable_reflection=True)
    
for translation in tqdm(translations):
    workflow.reset()
    simple_baseline(workflow, translation['chinese'], debug=False, enable_reflection=False)
    
for translation in tqdm(translations):
    workflow.reset()
    mad_baseline(workflow, translation['chinese'], agents=['Alice', 'Bob'], max_rounds=3, debug=False)
    
for translation in tqdm(translations):
    workflow.reset()
    simple_baseline(workflow, translation['chinese'], agents=['Alice', 'Bob'], max_rounds=3, debug=False)