In [1]:
import replicate
from pathlib import Path

## Train

In [3]:
models = {
    "llama2-7b": "meta/llama-2-7b:77dde5d6c56598691b9008f7d123a18d98f40e4b4978f8a72215ebfc2553ddd8"
}


Before training, need to create the destination model: https://replicate.com/create

The hardware setting here is for inference; for training, the model will use `8x A40 (Large)` automatically.

In [None]:
model_to_train = "llama2-7b"

training = replicate.trainings.create(
  version=models[model_to_train],
  input={
    "train_data": "https://gist.githubusercontent.com/organisciak/463f2f872dac7ae39629bd94c45f208d/raw/1b592ed942880e71dfa609c16c1450c6ad8f57b5/finetune-gt_main2_prepared_train.jsonl",
    "num_train_epochs": 3,
    "validation_data": "https://gist.githubusercontent.com/organisciak/463f2f872dac7ae39629bd94c45f208d/raw/1b592ed942880e71dfa609c16c1450c6ad8f57b5/finetune-gt_main2_prepared_val.jsonl"
  },
  destination="organisciak/ocsai-llama2-70b"
)

print(training)

# Evaluate

In [3]:
import os
import replicate
import json
import asyncio
from tqdm.auto import tqdm

import logging
logging.basicConfig(level=logging.ERROR)
logger = logging.getLogger(__name__)

model = "organisciak/ocsai-llama2-70b:b00751d00cca65ff9213aea7d4fc79b9f91d2af25c5f097bd2d9fd29cc952218"

with open('../data/ocsai1/finetune-gt_main2_prepared_test.jsonl') as f:
    test_data = [json.loads(line) for line in f]

  from .autonotebook import tqdm as notebook_tqdm


## Run asynchronously.

This need Python 3.11+

Run in batches, to save intermediate progress.

In [8]:
async def process_batch(batch):
    async with asyncio.TaskGroup() as tg:
        tasks = []
        for ex in batch:
            inputdict = {"prompt": ex['prompt'] + '\n', "temperature": 0.01}
            rep_run = replicate.async_run(model, input=inputdict)
            tasks.append(tg.create_task(rep_run))

    results = await asyncio.gather(*tasks)
    return ["".join(result).strip() for result in results]


batch_size = 10
all_results = []
for i in tqdm(range(0, len(test_data), batch_size)):
    batch = test_data[i:i + batch_size]
    batch_results = await process_batch(batch)
    all_results.extend(batch_results)

  0%|          | 0/303 [00:00<?, ?it/s]INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: GET https://api.replicate.com/v1/models/organisciak/ocsai-llama2-70b/versions/b00751d00cca65ff9213aea7d4fc79b9f91d2af25c5f097bd2d9fd29cc952218 "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request: POST https://api.replicate.com/v1/predictions "HTTP/1.1 201 Created"
INFO:httpx:HTTP Request

CancelledError: 

# Or not async:

In [4]:
all_results = []
for ex in tqdm(test_data):
    inputdict = {"prompt": ex['prompt'] + '\n', "temperature": 0.01}
    try:
        result = replicate.run(model, input=inputdict)
        result = "".join(result).strip()
    except KeyboardInterrupt:
        break
    except:
        logging.exception(f"Error with {ex['prompt']}")
        result = "ERROR"
    all_results.append(result)

  0%|          | 0/3030 [00:00<?, ?it/s]

100%|██████████| 3030/3030 [1:07:40<00:00,  1.34s/it]


## Basic Scoring Function

Keeping here for reference.

In [36]:
item = 'lightbulb'
response = 'toy'

prompt_template = "AUT Prompt:{}\nResponse:{}\nScore:\n\n"

def score_llama(item, response,
                model="organisciak/ocsai-llama2-70b:b00751d00cca65ff9213aea7d4fc79b9f91d2af25c5f097bd2d9fd29cc952218"):
    
    output = replicate.run(
        model,
        input={
            "debug": False,
            "top_p": 1,
            "prompt": prompt_template.format(item, response),
            "temperature": 0.01,
            "return_logits": False,
            "max_new_tokens": 3,
            "min_new_tokens": -1,
            "repetition_penalty": 1
        }, 
    )
    result = "".join(output).strip()
    try:
        result = int(result) / 10
    except:
        print("Error casting to int, returning as is:", result)
    return result
score_llama(item, response)

2.1

## Evaluate

In [81]:
# evaluate
import pandas as pd
from scipy.stats import pearsonr
rows = []
for score, ex in zip(all_results, test_data):
    ex['predicted'] = int(score) / 10
    #ex['target'] = int(ex['completion']) / 10
    rows.append(ex)
df = pd.DataFrame(rows)
df['gptprompt'] = df.prompt
df['prompt'] = df.gptprompt.apply(lambda x: x.split('AUT Prompt:')[1].split('\n')[0].strip())
df['response'] = df.gptprompt.apply(lambda x: x.split('Response:')[1].split('\n')[0].strip())
pearsonr(df.target, df.score)

PearsonRResult(statistic=0.6906771628853364, pvalue=0.0)

In [82]:
testdata = pd.read_parquet('../data/ocsai1/gt_main2_testdata.parquet').drop(columns=['predicted'])
# clean for merging
testdata['prompt'] = testdata['prompt'].str.strip()
testdata['response'] = testdata['response'].str.strip()
testdata = testdata.merge(df.drop(columns=['target']), on=['prompt', 'response'])
print(testdata.shape)

# save in same format as old data
s = 'gt_main2'
finetuned_size = '7b'
finetuned_proportion = 1
testdata['model'] = f"llama-{finetuned_size}"
testdata['proportion'] = finetuned_proportion
testdata['type'] = 'uses'
fname  = f'llama-ft-{finetuned_size}-{finetuned_proportion}.csv'

#testdata['predicted'] = testdata.predicted_raw.str.strip().str.replace('[\-\:/]','', regex=True).apply(lambda x:x.split(' ')[0])
#testdata['predicted'] = pd.to_numeric(testdata['predicted'], errors='coerce').div(10)
returncols = ['id', 'model', 'type', 'participant', 'prompt', 'target', 'predicted', 'src', 'total_tokens', 'proportion']
output = testdata[returncols]
base_dir = Path('../../')
print("Saving to", (base_dir / 'Data' / 'evaluation' / s / fname))
output.to_csv(base_dir / 'Data' / 'evaluation' / s / fname)

(3032, 19)
Saving to ../../Data/evaluation/gt_main2/llama-ft-7b-1.csv
