# Creating the dataset

In [1]:
import json
from pathlib import Path
import tempfile
import sys; sys.path.append("..")

import ray

from app.config import ROOT_DIR
from app.util import stratify_split

with open(Path(ROOT_DIR, "experiments/evaluations/gpt-4/llama-2-70b-gtebase.json")) as f:
    data = json.load(f)

In [2]:
ds = ray.data.from_items([{"question": result["question"], "targets": 0 if result["score"] < 4 else 1} for result in data["results"]])
train_ds, val_ds = stratify_split(ds, stratify="targets", test_size=0.3)
ds.show()

2023-08-30 21:17:12,693	INFO worker.py:1431 -- Connecting to existing Ray cluster at address: 10.0.41.167:6379...
2023-08-30 21:17:12,700	INFO worker.py:1612 -- Connected to Ray cluster. View the dashboard at [1m[32mhttps://session-iq4d2ux1mdavtyqs5xdnlk2vcv.i.anyscaleuserdata-staging.com [39m[22m
2023-08-30 21:17:12,703	INFO packaging.py:346 -- Pushing file package 'gcs://_ray_pkg_aa5eb8d5da2d1d69c8f39ffa7a820268.zip' (0.38MiB) to Ray cluster...
2023-08-30 21:17:12,704	INFO packaging.py:359 -- Successfully pushed file package 'gcs://_ray_pkg_aa5eb8d5da2d1d69c8f39ffa7a820268.zip'.
2023-08-30 21:17:12,938	INFO dataset.py:2357 -- Tip: Use `take_batch()` instead of `take() / show()` to return records in pandas or numpy batch format.


{'question': 'I’m struggling a bit with Ray Data type conversions when I do map_batches. Any advice?', 'targets': 1}
{'question': 'How does autoscaling work in a Ray Serve application?', 'targets': 1}
{'question': 'can i create my own ray image with custom python version', 'targets': 0}
{'question': 'how do I get the address of a ray node', 'targets': 0}
{'question': 'are you based on GPT-4?', 'targets': 1}
{'question': 'why it takes 10 mins for you to answer my question?', 'targets': 0}
{'question': 'Does Ray support NCCL?', 'targets': 0}
{'question': 'could you give me an example of using this library for data-parallel training of CNNs on Ray?', 'targets': 0}
{'question': 'Is Ray integrated with DeepSpeed?', 'targets': 0}
{'question': "what will happen if I use AsyncIO's await to wait for a Ray future like `await x.remote()`", 'targets': 0}
{'question': 'How would you compare Spark, Ray, Dask?', 'targets': 0}
{'question': 'why would ray overload a node w/ more task that the resources

In [3]:
with open(Path(ROOT_DIR, "datasets", "routing.json")) as f:
    dataset = json.load(f)

ds = ray.data.from_items(dataset)
train_ds, val_ds = stratify_split(ds, stratify="targets", test_size=0.3)
ds.show()

{'question': 'how can I use leela chess zero for a similar two player board game called breakthrough?', 'targets': 1}
{'question': 'what is num_samples in tune?', 'targets': 1}
{'question': "What's the difference between learner worker and local worker?", 'targets': 1}
{'question': 'I have a two player board game that I would like to learn by self-play using alphazero. How can I do this', 'targets': 1}
{'question': 'if I am inside of a anyscale cluster how do I get my cluster-env-build-id', 'targets': 0}
{'question': 'how do I run a task in ray?', 'targets': 1}
{'question': 'how to use ray to do distributed xgboost training on k8s', 'targets': 1}
{'question': "Is there a way to send work to Ray where the head worker doesn't execute the job, only the workers?", 'targets': 1}
{'question': "I'm trying to write a policy which randomly chooses only from the valid actions. In my environment's observations, I list the valid actions in an array of bools. By the time it reaches my policy's comp

In [4]:
import numpy as np
import torch
import torch.nn as nn
from transformers import BertModel, BertTokenizer

In [5]:
llm = BertModel.from_pretrained("bert-base-uncased", return_dict=False)
embedding_dim = llm.config.hidden_size

Downloading (…)lve/main/config.json:   0%|          | 0.00/570 [00:00<?, ?B/s]

Downloading model.safetensors:   0%|          | 0.00/440M [00:00<?, ?B/s]

In [22]:
# Sample
tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", return_dict=False)
text = "Transfer learning with transformers for text classification."
batch = tokenizer([text], return_tensors="np", padding="longest")
batch = {k: torch.tensor(v) for k, v in batch.items()}  # convert to torch tensors
seq, pool = llm(input_ids=batch["input_ids"], attention_mask=batch["attention_mask"])
np.shape(seq), np.shape(pool)

(torch.Size([1, 10, 768]), torch.Size([1, 768]))

In [6]:
class FinetunedLLM(nn.Module):
    def __init__(self, llm, dropout_p, embedding_dim, num_classes):
        super(FinetunedLLM, self).__init__()
        self.llm = llm
        self.dropout = torch.nn.Dropout(dropout_p)
        self.fc1 = torch.nn.Linear(embedding_dim, num_classes)

    def forward(self, batch):
        ids, masks = batch["ids"], batch["masks"]
        seq, pool = self.llm(input_ids=ids, attention_mask=masks)
        z = self.dropout(pool)
        z = self.fc1(z)
        return z
    
    @torch.inference_mode()
    def predict(self, batch):
        self.eval()
        z = self(inputs)
        y_pred = torch.argmax(z, dim=1).cpu().numpy()
        return y_pred
    
    @torch.inference_mode()
    def predict_proba(self, batch):
        self.eval()
        z = self(batch)
        y_probs = F.softmax(z).cpu().numpy()
        return y_probs

In [7]:
model = FinetunedLLM(llm=llm, dropout_p=0.5, embedding_dim=embedding_dim, num_classes=2)
print (model.named_parameters)

<bound method Module.named_parameters of FinetunedLLM(
  (llm): BertModel(
    (embeddings): BertEmbeddings(
      (word_embeddings): Embedding(30522, 768, padding_idx=0)
      (position_embeddings): Embedding(512, 768)
      (token_type_embeddings): Embedding(2, 768)
      (LayerNorm): LayerNorm((768,), eps=1e-12, elementwise_affine=True)
      (dropout): Dropout(p=0.1, inplace=False)
    )
    (encoder): BertEncoder(
      (layer): ModuleList(
        (0-11): 12 x BertLayer(
          (attention): BertAttention(
            (self): BertSelfAttention(
              (query): Linear(in_features=768, out_features=768, bias=True)
              (key): Linear(in_features=768, out_features=768, bias=True)
              (value): Linear(in_features=768, out_features=768, bias=True)
              (dropout): Dropout(p=0.1, inplace=False)
            )
            (output): BertSelfOutput(
              (dense): Linear(in_features=768, out_features=768, bias=True)
              (LayerNorm): Layer

In [8]:
def preprocess(batch):
    tokenizer = BertTokenizer.from_pretrained("bert-base-uncased", return_dict=False)
    encoded_inputs = tokenizer(batch["question"].tolist(), return_tensors="np", padding="longest")
    return {"ids": encoded_inputs["input_ids"], "masks": encoded_inputs["attention_mask"], "targets": batch["targets"]}

train_ds = train_ds.map_batches(preprocess)
val_ds = val_ds.map_batches(preprocess)

train_ds.count()

2023-08-30 21:17:37,494	INFO streaming_executor.py:93 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[Sort] -> AllToAllOperator[MapBatches(group_fn)->MapBatches(_filter_split)->RandomShuffle] -> TaskPoolMapOperator[MapBatches(preprocess)]
2023-08-30 21:17:37,495	INFO streaming_executor.py:94 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=True, actor_locality_enabled=True, verbose_progress=False)
2023-08-30 21:17:37,495	INFO streaming_executor.py:96 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


- Sort 1:   0%|          | 0/200 [00:00<?, ?it/s]

Sort Sample 2:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Map 3:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Reduce 4:   0%|          | 0/200 [00:00<?, ?it/s]

- MapBatches(group_fn)->MapBatches(_filter_split)->RandomShuffle 5:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Map 6:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Reduce 7:   0%|          | 0/200 [00:00<?, ?it/s]

Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

Sort Sample 0:   0%|          | 0/200 [00:00<?, ?it/s]

Downloading (…)solve/main/vocab.txt: 100%|██████████| 232k/232k [00:00<00:00, 7.45MB/s]
Downloading (…)okenizer_config.json: 100%|██████████| 28.0/28.0 [00:00<00:00, 8.59kB/s]


419

In [9]:
from ray.train.torch import get_device

def pad_array(arr, dtype=np.int32):
    max_len = max(len(row) for row in arr)
    padded_arr = np.zeros((arr.shape[0], max_len), dtype=dtype)
    for i, row in enumerate(arr):
        padded_arr[i][:len(row)] = row
    return padded_arr

def collate_fn(batch):
    batch["ids"] = pad_array(batch["ids"])
    batch["masks"] = pad_array(batch["masks"])
    dtypes = {"ids": torch.int32, "masks": torch.int32, "targets": torch.int64}
    tensor_batch = {}
    for key, array in batch.items():
        tensor_batch[key] = torch.as_tensor(array, dtype=dtypes[key], device=get_device())
    return tensor_batch

In [10]:
from ray import train
from ray.train import Checkpoint, CheckpointConfig, DataConfig, RunConfig, ScalingConfig
from ray.train.torch import TorchTrainer
import torch.nn.functional as F

In [11]:
def train_step(ds, batch_size, model, num_classes, loss_fn, optimizer):
    """Train step."""
    model.train()
    loss = 0.0
    ds_generator = ds.iter_torch_batches(batch_size=batch_size, collate_fn=collate_fn)
    for i, batch in enumerate(ds_generator):
        optimizer.zero_grad()  # reset gradients
        z = model(batch)  # forward pass
        targets = F.one_hot(batch["targets"], num_classes=num_classes).float()  # one-hot (for loss_fn)
        J = loss_fn(z, targets)  # define loss
        J.backward()  # backward pass
        optimizer.step()  # update weights
        loss += (J.detach().item() - loss) / (i + 1)  # cumulative loss
    return loss

In [12]:
def eval_step(ds, batch_size, model, num_classes, loss_fn):
    """Eval step."""
    model.eval()
    loss = 0.0
    y_trues, y_preds = [], []
    ds_generator = ds.iter_torch_batches(batch_size=batch_size, collate_fn=collate_fn)
    with torch.inference_mode():
        for i, batch in enumerate(ds_generator):
            z = model(batch)
            targets = F.one_hot(batch["targets"], num_classes=num_classes).float()  # one-hot (for loss_fn)
            J = loss_fn(z, targets).item()
            loss += (J - loss) / (i + 1)
            y_trues.extend(batch["targets"].cpu().numpy())
            y_preds.extend(torch.argmax(z, dim=1).cpu().numpy())
    return loss, np.vstack(y_trues), np.vstack(y_preds)

In [13]:
# Training loop
def train_func(config):
    # Hyperparameters
    dropout_p = config["dropout_p"]
    lr = config["lr"]
    lr_factor = config["lr_factor"]
    lr_patience = config["lr_patience"]
    num_epochs = config["num_epochs"]
    batch_size = config["batch_size"]
    num_classes = config["num_classes"]

    # Get datasets
    # set_seeds()
    train_ds = train.get_dataset_shard("train")
    val_ds = train.get_dataset_shard("val")

    # Model
    llm = BertModel.from_pretrained("allenai/scibert_scivocab_uncased", return_dict=False)
    model = FinetunedLLM(llm=llm, dropout_p=dropout_p, embedding_dim=llm.config.hidden_size, num_classes=num_classes)
    model = train.torch.prepare_model(model)

    # Training components
    loss_fn = nn.BCEWithLogitsLoss()
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=lr_factor, patience=lr_patience)

    # Training
    batch_size_per_worker = batch_size // train.get_context().get_world_size()
    for epoch in range(num_epochs):
        # Step
        train_loss = train_step(train_ds, batch_size_per_worker, model, num_classes, loss_fn, optimizer)
        val_loss, _, _ = eval_step(val_ds, batch_size_per_worker, model, num_classes, loss_fn)
        scheduler.step(val_loss)

        # Checkpoint
        metrics = dict(epoch=epoch, lr=optimizer.param_groups[0]["lr"], train_loss=train_loss, val_loss=val_loss)
        with tempfile.TemporaryDirectory() as tmpdir:
            torch.save(model.state_dict(), os.path.join(tmpdir, "model.pt"))
            train.report(metrics, checkpoint=Checkpoint.from_directory(tmpdir))

In [14]:
# Train loop config
train_loop_config = {
    "dropout_p": 0.5,
    "lr": 1e-5,
    "lr_factor": 0.8,
    "lr_patience": 3,
    "num_epochs": 30,
    "batch_size": 32,
    "num_classes": 2,
}

In [15]:
# Scaling config
scaling_config = ScalingConfig(
    num_workers=1,
    use_gpu=True,
    resources_per_worker={"CPU": 10, "GPU": 1},
    _max_cpu_fraction_per_node=0.8,
)

In [16]:
# Run config
checkpoint_config = CheckpointConfig(num_to_keep=1, checkpoint_score_attribute="val_loss", checkpoint_score_order="min")
run_config = RunConfig(name="llm", checkpoint_config=checkpoint_config, local_dir="~/ray_results")



In [17]:
# Trainer
trainer = TorchTrainer(
    train_func,
    train_loop_config=train_loop_config,
    scaling_config=scaling_config,
    run_config=run_config,
    datasets={"train": train_ds.materialize(), "val": val_ds.materialize()},
)

2023-08-30 21:18:12,198	INFO streaming_executor.py:93 -- Executing DAG InputDataBuffer[Input] -> AllToAllOperator[Sort] -> AllToAllOperator[MapBatches(group_fn)->MapBatches(_filter_split)->RandomShuffle] -> TaskPoolMapOperator[MapBatches(preprocess)]
2023-08-30 21:18:12,199	INFO streaming_executor.py:94 -- Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=None), locality_with_output=False, preserve_order=True, actor_locality_enabled=True, verbose_progress=False)
2023-08-30 21:18:12,199	INFO streaming_executor.py:96 -- Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


- Sort 1:   0%|          | 0/200 [00:00<?, ?it/s]

Sort Sample 2:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Map 3:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Reduce 4:   0%|          | 0/200 [00:00<?, ?it/s]

- MapBatches(group_fn)->MapBatches(_filter_split)->RandomShuffle 5:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Map 6:   0%|          | 0/200 [00:00<?, ?it/s]

Shuffle Reduce 7:   0%|          | 0/200 [00:00<?, ?it/s]

Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

Sort Sample 0:   0%|          | 0/200 [00:00<?, ?it/s]

In [18]:
%%time
# Train
results = trainer.fit()

0,1
Current time:,2023-08-30 21:20:47
Running for:,00:02:24.02
Memory:,9.8/62.1 GiB

Trial name,status,loc,iter,total time (s),epoch,lr,train_loss
TorchTrainer_6cb07_00000,TERMINATED,10.0.41.167:6798,30,138.48,29,2.09715e-06,0.0638379


[2m[36m(TorchTrainer pid=6798)[0m Starting distributed worker processes: ['6893 (10.0.41.167)']
[2m[36m(RayTrainWorker pid=6893)[0m Setting up process group for: env:// [rank=0, world_size=1]
Downloading (…)lve/main/config.json: 100%|██████████| 385/385 [00:00<00:00, 118kB/s]
Downloading pytorch_model.bin:   0%|          | 0.00/442M [00:00<?, ?B/s]
Downloading pytorch_model.bin:   2%|▏         | 10.5M/442M [00:00<00:07, 60.1MB/s]
[2m[36m(SplitCoordinator pid=6984)[0m Auto configuring locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e']
Downloading pytorch_model.bin:   9%|▉         | 41.9M/442M [00:00<00:02, 153MB/s] 
Downloading pytorch_model.bin:  17%|█▋        | 73.4M/442M [00:00<00:01, 188MB/s]
Downloading pytorch_model.bin:  24%|██▎       | 105M/442M [00:00<00:01, 207MB/s] 
Downloading pytorch_model.bin:  31%|███       | 136M/442M [00:00<00:01, 217MB/s]
Downloading pytorch_model.bin:  38%|███▊      | 168M/442M [00:00<00:01, 225MB/s]
Downloading

(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

[2m[36m(SplitCoordinator pid=6984)[0m Executing DAG InputDataBuffer[Input] -> OutputSplitter[split(1, equal=True)]
[2m[36m(SplitCoordinator pid=6984)[0m Execution config: ExecutionOptions(resource_limits=ExecutionResources(cpu=None, gpu=None, object_store_memory=2000000000.0), locality_with_output=['f345f4a29a524c2dda718e763a8b6278b481b372979fd2d18f46080e'], preserve_order=False, actor_locality_enabled=True, verbose_progress=False)
[2m[36m(SplitCoordinator pid=6984)[0m Tip: For detailed progress reporting, run `ray.data.DataContext.get_current().execution_options.verbose_progress = True`


(pid=6984) Running 0:   0%|          | 0/200 [00:00<?, ?it/s]

2023-08-30 21:20:47,468	INFO tune.py:1146 -- Total run time: 144.20 seconds (144.01 seconds for the tuning loop).


CPU times: user 3.84 s, sys: 1.75 s, total: 5.59 s
Wall time: 2min 24s


In [19]:
results.metrics_dataframe

Unnamed: 0,epoch,lr,train_loss,val_loss,timestamp,time_this_iter_s,should_checkpoint,done,training_iteration,trial_id,date,time_total_s,pid,hostname,node_ip,time_since_restore,iterations_since_restore
0,0,1e-05,0.549363,0.518476,1693455518,12.171986,True,False,1,6cb07_00000,2023-08-30_21-18-38,12.171986,6798,ip-10-0-41-167,10.0.41.167,12.171986,1
1,1,1e-05,0.502609,0.519532,1693455522,4.334957,True,False,2,6cb07_00000,2023-08-30_21-18-43,16.506943,6798,ip-10-0-41-167,10.0.41.167,16.506943,2
2,2,1e-05,0.500884,0.518813,1693455527,4.319682,True,False,3,6cb07_00000,2023-08-30_21-18-47,20.826625,6798,ip-10-0-41-167,10.0.41.167,20.826625,3
3,3,1e-05,0.493299,0.524068,1693455531,4.372862,True,False,4,6cb07_00000,2023-08-30_21-18-51,25.199487,6798,ip-10-0-41-167,10.0.41.167,25.199487,4
4,4,8e-06,0.48231,0.529211,1693455536,4.328253,True,False,5,6cb07_00000,2023-08-30_21-18-56,29.52774,6798,ip-10-0-41-167,10.0.41.167,29.52774,5
5,5,8e-06,0.486419,0.539902,1693455540,4.366602,True,False,6,6cb07_00000,2023-08-30_21-19-00,33.894342,6798,ip-10-0-41-167,10.0.41.167,33.894342,6
6,6,8e-06,0.468147,0.529105,1693455544,4.359318,True,False,7,6cb07_00000,2023-08-30_21-19-04,38.25366,6798,ip-10-0-41-167,10.0.41.167,38.25366,7
7,7,8e-06,0.461753,0.54134,1693455549,4.350897,True,False,8,6cb07_00000,2023-08-30_21-19-09,42.604557,6798,ip-10-0-41-167,10.0.41.167,42.604557,8
8,8,6e-06,0.430996,0.56578,1693455553,4.34821,True,False,9,6cb07_00000,2023-08-30_21-19-13,46.952767,6798,ip-10-0-41-167,10.0.41.167,46.952767,9
9,9,6e-06,0.423773,0.574876,1693455557,4.340979,True,False,10,6cb07_00000,2023-08-30_21-19-17,51.293746,6798,ip-10-0-41-167,10.0.41.167,51.293746,10


# Trying RAG score

In [203]:
from app.query import QueryAgent

records = data["results"]

agent = QueryAgent()

for record in records:
    embedding = np.array(agent.embedding_model.embed_query(record["question"]))
    with agent.conn.cursor() as cur:
        cur.execute("SELECT * FROM document ORDER BY embedding <-> %s LIMIT 5", (embedding,))
        rows = cur.fetchall()
    record["corr"] = np.dot(rows[0][3], embedding)

In [221]:
import pandas as pd
df = pd.DataFrame.from_records(records)
# df["num"] = df["question"].str.len()
# df.corr(numeric_only=True)
pd.set_option('display.max_rows', len(df))
df.sort_values(by=["corr"])


Unnamed: 0,question,generated_answer,reference_answer,score,reasoning,sources,corr
5,why it takes 10 mins for you to answer my ques...,"Based on the provided context, it's likely tha...",,2.0,The AI response seems well-explained with appr...,[https://docs.ray.io/en/master/ray-core/api/do...,0.832438
4,are you based on GPT-4?,"Based on the provided context, it seems that G...",,4.0,The generated answer does a good job addressin...,[https://docs.ray.io/en/master/ray-air/example...,0.848736
42,if there are O(millions) of keys that all have...,It is generally not recommended to spin up 1 a...,"In reference to the context provided, creating...",5.0,"The generated answer accurately, clearly, and ...",[https://docs.ray.io/en/master/data/examples/r...,0.849179
172,What are the reasons for spikes in node CPU ut...,The reasons for spikes in node CPU utilization...,Spikes in node CPU utilization can be caused b...,4.5,The generated answer is quite informative and ...,[https://docs.ray.io/en/master/cluster/faq.htm...,0.849213
107,Can I specify multiple working directories?,"Yes, you can specify multiple working director...","No, you can only specify a single working dire...",1.0,The generated answer is incorrect and does not...,[https://docs.ray.io/en/master/tune/faq.html#h...,0.857076
178,What is the working of `PowerOfTwoChoicesRepli...,The `PowerOfTwoChoicesReplicaScheduler` is a s...,,4.8,"The generated answer is well elaborated, with ...",[https://docs.ray.io/en/master/_modules/ray/_p...,0.86332
128,How to update files in working directory ?,"To update files in the working directory, you ...","To update files in the working directory, you ...",1.0,The generated answer is completely misleading ...,[https://docs.ray.io/en/master/serve/api/index...,0.86339
169,What is the reason actors change their state t...,The reason actors change their state to unheal...,Actors change their state to 'unhealthy' or 'D...,3.75,While the generated answer does attempt to ans...,[https://docs.ray.io/en/master/_modules/ray/rl...,0.864036
71,can’t pickle SSLContext objects,"Based on the provided context, it seems that t...",SSLContext objects can't be pickled by default...,4.0,The generated answer is very detailed and comp...,[https://docs.ray.io/en/master/ray-core/object...,0.865827
15,"can you create a tweet thread from chapter 8, ...",Sure! Here's a possible tweet thread based on ...,,4.0,The generated answer is an accurately detailed...,[https://docs.ray.io/en/master/ray-air/example...,0.866004


[2m[1m[36m(autoscaler +8h18m39s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +8h23m39s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +8h28m39s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h3m44s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h8m44s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h13m44s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h18m45s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h23m50s)[0m [workspace snapshot] New snapshot created successfully (size: 35.40 MB).
[2m[1m[36m(autoscaler +9h28m50s)[0m [workspace snapshot] New snapshot created successfully (si

In [185]:
import os
os.environ["DB_CONNECTION_STRING"]="dbname=postgres user=postgres host=localhost password=postgres"
agent = QueryAgent()
records = [{"question": result["question"], "targets": 0 if result["score"] < 4 else 1} for result in data["results"]]
record = records[0]
embedding = np.array(agent.embedding_model.embed_query(record["question"]))
with agent.conn.cursor() as cur:
    cur.execute("SELECT * FROM document ORDER BY embedding <-> %s LIMIT 5", (embedding,))
    rows = cur.fetchall()
np.dot(rows[0][3], embedding)

0.9256965196597495

# Self-evaluation for models

In [4]:
import time

import openai

def generate_response(llm, system_content, assistant_content, user_content, max_retries=3, retry_interval=60):
    retry_count = 0
    while retry_count < max_retries:
        try:
            response = openai.ChatCompletion.create(
                    model=llm,
                    messages=[
                        {"role": "system", "content": system_content},
                        {"role": "assistant", "content": assistant_content},
                        {"role": "user", "content": user_content},
                    ])
            return response["choices"][-1]["message"]["content"]
        except Exception as e:
            print (e)
            time.sleep(retry_interval)  # default is per-minute rate limits
            retry_count += 1
    return ""

In [7]:
import requests
import tempfile
from urllib.parse import urlparse
from bs4 import BeautifulSoup

def extract_text(content, custom_html_tag=None):
    soup = BeautifulSoup(content)

    # default tags
    html_tags = [
        ("div", {"role": "main"}),
        ("main", {"id": "main-content"}),
    ]

    if custom_html_tag is not None:
        html_tags.append(custom_html_tag)

    text = None

    # reversed order. check the custom one first
    for tag, attrs in html_tags[::-1]:
        text = soup.find(tag, attrs)
        # if found, break
        if text is not None:
            break

    if text is not None:
        text = text.get_text()
    else:
        text = ""
    # trim empty lines
    return "\n".join([t for t in text.split("\n") if t])

llm = "meta-llama/Llama-2-70b-chat-hf"

def score_own_answer(record):
    query = record["question"]
    source = record["source"]
    # source = "https://docs.ray.io/en/master/data/transforming-data.html#configuring-batch-format"
    if source.startswith("https://docs.ray.io"):
        url = urlparse(source)
        response = requests.get(source)
        custom_html_tag = ("section", {"id": url.fragment}) if url.fragment else None
        context = extract_text(response.content, custom_html_tag)
    else:
        context = ""

    system_content = "Answer the {query} using the additional {context} provided.".format(query=query, context=context)
    assistant_content = ""
    user_content = ""

    answer = generate_response(llm, system_content, assistant_content, user_content)

    system_content += "\n " + answer + "\n Your job is to rate the quality of the answer above.\n Your score has to be between 1 and 5.\n You must return your response in a line with only the score.\n Do not return answers in any other format."

    record["rating"] = generate_response(llm, system_content, assistant_content, user_content)

with open(Path(ROOT_DIR, "datasets/eval-dataset-v1.jsonl")) as f:
    records = [json.loads(line) for line in f]

for record in records:
    score_own_answer(record)
    print(record)

No API key provided. You can set your API key in code using 'openai.api_key = <API-KEY>', or you can set the environment variable OPENAI_API_KEY=<API-KEY>). If your API key is stored in a file, you can point the openai module at it with 'openai.api_key_path = <PATH>'. You can generate API keys in the OpenAI web interface. See https://platform.openai.com/account/api-keys for details.


KeyboardInterrupt: 