# Introduction

This notebook shows how we created the VLM fine-tuning dataset for our pipeline. It uses the data generated by runninng
- `synthetic/query_writing.py` -> used to create query writing examples
- `agents/data_gen_agent.py` -> used to create re-ranker and answer generation examples

We used the `public_test` split of `crag-mm-2025/crag-mm-single-turn-public (v0.1.2)` dataset to create task 2  (multi-source augmentation) fine-tuning dataset. This notebook demos the dataset creation steps using a subset of examples from the above dataset.

# Eval Utils

LLM-as-a-judge set up for curation

In [1]:
import concurrent.futures
import os
import time

import pandas as pd
from datasets import Dataset
from datasets import Image as HFImage
from datasets import load_dataset
from openai import AzureOpenAI
from pydantic import BaseModel
from tqdm.auto import tqdm

from pydantic import BaseModel
from openai import AzureOpenAI, OpenAI
import concurrent.futures

class CRAGTurnEvaluationResult(BaseModel):
    """Structured output model for CRAG turn evaluation results."""
    accuracy: bool
        
def get_system_message() -> str:
    """
    Returns the system message for the evaluator.
    """
    return (
        "You are an expert evaluator for question answering systems. "
        "Your task is to determine if a prediction correctly answers a question based on the ground truth.\n\n"
        "Rules:\n"
        "1. The prediction is correct if it captures all the key information from the ground truth.\n"
        "2. The prediction is correct even if phrased differently as long as the meaning is the same.\n"
        "3. The prediction is incorrect if it contains incorrect information or is missing essential details.\n"
        "Output a JSON object with a single field 'accuracy' whose value is true or false."
    )

def attempt_api_call(client, model_name, messages, max_retries=3):
    for attempt in range(max_retries):
        try:
            completion = client.beta.chat.completions.parse(model=model_name, messages=messages, response_format=CRAGTurnEvaluationResult)
            return completion.choices[0].message.parsed
        except Exception as e:
            time.sleep(1)
            error_message = f"API call failed on attempt {attempt + 1}/{max_retries}: {str(e)}"
            if attempt == max_retries - 1:
                print(f"[red]Failed after {max_retries} attempts: {str(e)}[/red]")
            else:
                print(f"[yellow]{error_message}, retrying...[/yellow]")
    return None

def evaluate_response(query, ground_truth, agent_response, eval_model_name = "gpt-4o"):
    is_idk = "i don't know" in agent_response.lower()
    is_exact_match = agent_response.strip().lower() == ground_truth.strip().lower()
    is_semantically_correct = False
    api_response = None

    is_correct = is_exact_match

    if not is_idk and not is_exact_match:
        local_openai_client = OpenAI()

        messages = [
            {"role": "system", "content": get_system_message()},
            {"role": "user", "content": f"Question: {query}\nGround truth: {ground_truth}\nPrediction: {agent_response}\n"},
        ]
        
        api_response = attempt_api_call(local_openai_client, eval_model_name, messages)
        
        if api_response:
            is_semantically_correct = api_response.accuracy
            is_correct = is_semantically_correct
    if is_exact_match:
        is_semantically_correct = True

    return {
        "is_exact_match": is_exact_match,
        "is_correct": is_correct,
        "is_miss": is_idk,
        "is_semantically_correct": is_semantically_correct,
        "api_response": api_response.model_dump() if api_response else None,
    }

def evaluate_response_parallel(responses, max_workers=8):
    """
    Evaluate responses in parallel using ThreadPoolExecutor.
    
    Args:
        responses: List of response dictionaries
        max_workers: Number of parallel threads to use
    """
    
    def evaluate_single_response(response):
        query = response['query']
        ground_truth = response['answer']
        agent_response = response['predicted_answer']
        
        evaluation_result = evaluate_response(query, ground_truth, agent_response)

        response['eval_is_correct'] = evaluation_result['is_correct']
        response['eval_is_semantically_correct'] = evaluation_result['is_semantically_correct']
        response['eval_is_miss'] = evaluation_result['is_miss']
        response["eval_api_response"] = evaluation_result['api_response']
        
        return response
    
    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
        future_to_response = {
            executor.submit(evaluate_single_response, response): i 
            for i, response in enumerate(responses)
        }
        
        with tqdm(total=len(responses), desc="Evaluating responses") as pbar:
            for future in concurrent.futures.as_completed(future_to_response):
                try:
                    result = future.result()
                    pbar.update(1)
                except Exception as exc:
                    idx = future_to_response[future]
                    print(f'Response {idx} generated an exception: {exc}')
                    pbar.update(1)

# Tokenizer

In [35]:
from transformers import AutoTokenizer

tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-11B-Vision-Instruct", trust_remote_code=True)

# Read Data Agent Outputs

Read the outputs generated from running `python data_gen.py --output_dir data/kddcup25_datagen_outputs`

In [2]:
import glob
import numpy as np

from tqdm.auto import tqdm
from collections import Counter, defaultdict
from datasets import load_dataset, load_from_disk, concatenate_datasets

In [6]:
dataset_dirs = []
dataset_dirs.extend(glob.glob("../data/kddcup25_datagen_outputs/*/"))

ds_list = []
for d in dataset_dirs:
    ds_list.append(load_from_disk(d))

ds = concatenate_datasets(ds_list)

interaction_groups = defaultdict(list) # group interaction_id

pbar = tqdm(range(len(ds)))
for i, example in enumerate(ds):
    interaction_id = example['interaction_id']
    interaction_groups[interaction_id].append((i, example))
    pbar.update(1)
pbar.close()

print(f"Found {len(interaction_groups)} unique interaction_ids")

groups_with_correct = 0
total_groups = len(interaction_groups)

for interaction_id, examples in interaction_groups.items():
    has_correct = any(example[1]['eval_is_correct'] for example in examples)
    if has_correct:
        groups_with_correct += 1

print(f"Groups with at least one correct answer: {groups_with_correct}")
print(f"Total groups: {total_groups}")
print(f"Percentage: {groups_with_correct/total_groups*100:.1f}%")

  0%|          | 0/24925 [00:00<?, ?it/s]

Found 1092 unique interaction_ids
Groups with at least one correct answer: 675
Total groups: 1092
Percentage: 61.8%


# Curation

- Double check correctness of examples where context relevance is high but answer is wrong. 
- Now check with GPT-4o instead of GPT-4o-mini

In [8]:
re_eval_examples = []

double_check_relevance_th = 0.8

for this_id, instances in interaction_groups.items():    
    for _, ex in instances:
        if ex['eval_is_correct']:
            continue
        if ex['context_relevance'] > double_check_relevance_th:
            re_eval_examples.append(ex)
            
len(re_eval_examples)

223

In [9]:
evaluate_response_parallel(re_eval_examples, max_workers=8)
sum([x['eval_is_correct'] for x in re_eval_examples]) # number of cases where correctness judgement changed

Evaluating responses:   0%|          | 0/223 [00:00<?, ?it/s]

59

# Process & filter data

In [11]:
import random

In [12]:
rank_cutoff = 8 # don't include context beyond this rank for fine-tuning dataset
rel_skip_th = 0.8 # don't mark a context as negative above this threshold even if it didn't lead to correct answer
negative_ratio = 10 # maximum number of irrelevant context for re-ranker per query

main_task_examples = []
relevance_task_examples = []

pbar = tqdm(range(len(interaction_groups)))
n_contexts = []

for this_id, instances in interaction_groups.items():
    rank = 0
    contexts = []
    good_contexts = []
    at_least_one_correct = False
    rel_examples = []
    nc = 0
    nneg = 0
    
    instances = sorted(instances, key=lambda x: x[1]['context_relevance'], reverse=True)
    good_answers = []
    
    for _, ex in instances:
        contexts.append(ex['search_content'])
        nc += 1
        
        if ex['eval_is_correct']:
            at_least_one_correct = True
            good_contexts.append(ex['search_content'])
            good_answers.append(ex['predicted_answer'])

        if ex['eval_is_correct'] or ex['context_relevance'] <= rel_skip_th:
            rel_example = {
                'session_id': ex['session_id'],
                'interaction_id': ex['interaction_id'],
                'query': ex['query'],
                'message_history': ex['message_history'],
                'context': ex['search_content'],
                'is_relevant': "Yes" if ex['eval_is_correct'] else "No",
            }
            
            if not ex['eval_is_correct']:
                nneg += 1
                if nneg <= negative_ratio:
                    rel_examples.append(rel_example)
            else:
                rel_examples.append(rel_example)
            
        rank += 1
        if rank == rank_cutoff:
            break
            
    #---
    if at_least_one_correct:
        answer = ex['answer'] # all answers in a group are same, so we can use the last one to get the answer
        if len(rel_examples) > 6:
            rel_examples = random.sample(rel_examples, 5)
        relevance_task_examples.extend(rel_examples)
    else:
        answer = "I don't know"
        if len(rel_examples) > 0:
            rel_examples = random.sample(rel_examples, 1)
            relevance_task_examples.extend(rel_examples)
        
    n_contexts.append(nc)
        
    this_example = {
        'session_id': ex['session_id'],
        'interaction_id': ex['interaction_id'],
        'turn_idx': ex['turn_idx'],
        'query': ex['query'],
        'message_history': ex['message_history'],
        'answer': answer,
        'context_list': contexts,
        'good_context_list': good_contexts,
        'good_answer_list': good_answers,
    }
    pbar.update(1)
    
    main_task_examples.append(this_example)
    
pbar.close()

  0%|          | 0/1092 [00:00<?, ?it/s]

In [14]:
len(main_task_examples), len(relevance_task_examples)

(1092, 3551)

In [15]:
Counter([ex['is_relevant'] for ex in relevance_task_examples])

Counter({'No': 2026, 'Yes': 1525})

In [16]:
Counter([ex['answer'] != "I don't know" for ex in main_task_examples])

Counter({True: 615, False: 477})

# Process query writing examples

In [22]:
import glob
import json

paths = glob.glob("../data/kddcup25_synthetic/query_writing/*.json")
query_list = []

for path in paths:
    with open(path, 'r') as f:
        data = json.load(f)
        query_list.append(data)

print(f"Loaded {len(query_list)} files")
mapping = {x['interaction_id']:x['web_queries'] for x in query_list}

Loaded 16 files


In [26]:
# mapping

In [27]:
dataset_type = "single-turn"
dataset_split = "public_test"
repo_name = f"crag-mm-2025/crag-mm-{dataset_type}-public"
dataset = load_dataset(repo_name, revision="v0.1.2")[dataset_split]

dataset = dataset.remove_columns(['image'])
len(dataset)

Using the latest cached version of the dataset since crag-mm-2025/crag-mm-single-turn-public couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /lustre/fsw/portfolios/datascience/users/rabiswas/.cache/huggingface/datasets/crag-mm-2025___crag-mm-single-turn-public/default/0.0.0/e3a061380b9e5c7cab76f6001173872fe9149081 (last modified on Fri Jun 13 04:15:12 2025).


1936

In [29]:
q_gen_task_examples = []

for ex in dataset:
    search_strings = []
    
    iid = ex['turns']['interaction_id'][0]
    if iid not in mapping: 
        continue
    q_all = mapping[iid]
    
    example = {
        'session_id': ex['session_id'],
        'interaction_id': ex['turns']['interaction_id'][0],
        'query': ex['turns']['query'][0],
        'search_queries': q_all,
    }
            
    q_gen_task_examples.append(example)

# SFT Dataset

Multi-task dataset for VLM finetuning

In [30]:
processed_examples = []

# Task 1: Answer Generation

In [31]:
QA_SYSTEM_PROMPT = """Answer the question factually. Instructions:
- Be specific and avoid assumptions
- Answer based only on what you can directly observe in the image
- Use additional context only if it's directly relevant and increases confidence
- If uncertain or the image doesn't contain the requested information, say "I don't know"

Provide a direct, factual answer in 1 sentence. If unsure, just say "I don't know" rather than guessing."""

In [37]:
from copy import deepcopy

MAX_CONTEXT = 8
n_replace, n_found = 0, 0
UPSAMPLE = 2

pbar = tqdm(range(len(main_task_examples)))

for ex in main_task_examples:
    curr = []
    query = ex['query']
    message_history = ex['message_history']
    messages = [{"role": "system", "content": QA_SYSTEM_PROMPT}]

    if message_history:
        messages = messages + message_history
        
    context_candidates = ex['context_list']
    good_candidates = ex['good_context_list']
    
    final_contexts = deepcopy(context_candidates)[:MAX_CONTEXT]
    overlap = False
    
    for xx in final_contexts:
        if xx in good_candidates:
            overlap = True
            n_found += 1
            break
    
    if not overlap and len(good_candidates) > 0:
        n_replace += 1
        nc = random.choice([1, 2])
        nc = min(len(good_candidates), nc)
        
        pos_list = random.sample([ii for ii in range(MAX_CONTEXT)], k=nc)
        
        for idx, pos in enumerate(pos_list):
            if len(final_contexts) < MAX_CONTEXT:
                final_contexts.append(good_candidates[idx])
            else:
                final_contexts[pos] = good_candidates[idx]
        
        
    # build context
    context = "\n\n".join([f"[Info {i+1}] {c}" for i, c in enumerate(final_contexts)])
    
    if len(context.strip()) > 0:
        context = f"\n\nHere is some additional information that may help you answer:\n\n{context}"
        messages.append({"role": "user", "content": context})
        
    messages.append({"role": "user", "content": [{"type": "image"}]})
    messages.append({"role": "user", "content": f"Question: {query}"})

    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    this_example = {
        'session_id': ex['session_id'],
        'interaction_id': ex['interaction_id'],
        'prompt': formatted_prompt,
        'answer': ex['answer'],
        'task': 'qa'
    }
    curr.append(this_example)
    
    
    for ans in ex['good_answer_list']:
        if ans.endswith('.'):
            this_example = {
                'session_id': ex['session_id'],
                'interaction_id': ex['interaction_id'],
                'prompt': formatted_prompt,
                'answer': ans.lower(),
                'task': 'qa'
            }
            curr.append(this_example)
            
            if len(curr) >= UPSAMPLE:
                break
    

    processed_examples.extend(curr)
    pbar.update(1)
    
pbar.close()

  0%|          | 0/1092 [00:00<?, ?it/s]

# Task 2: Ranker

In [39]:
RANKER_SYSTEM_PROMPT = """Determine whether the provided context contains sufficient information to answer the given question about the image.

Instructions:
- Examine both the image and the additional context provided
- Answer "Yes" if the context contains enough relevant information to answer the question accurately
- Answer "No" if the context is insufficient, irrelevant, or contradicts what's visible in the image
- Focus on whether the context helps answer the specific question asked
- Consider the context as supplementary to what you can observe in the image

Respond with only "Yes" or "No"."""

In [40]:
pbar = tqdm(range(len(relevance_task_examples)))

for ex in relevance_task_examples:
    query = ex['query']
    message_history = ex['message_history']
    context = ex['context']
    context = f"\n\nAdditional Context:\n\n{context}"

    messages = [{"role": "system", "content": RANKER_SYSTEM_PROMPT}]

    if message_history:
        messages = messages + message_history
        
    messages.append({"role": "user", "content": context})
    messages.append({"role": "user", "content": [{"type": "image"}]})

    messages.append({"role": "user", "content": f"Question: {query}. Does the additional context offer enough information to answer the quesiton? (Yes/No)"})

    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    this_example = {
        'session_id': ex['session_id'],
        'interaction_id': ex['interaction_id'],
        'prompt': formatted_prompt,
        'answer': ex['is_relevant'],
        'task': 'ranker'
    }

    processed_examples.append(this_example)
    pbar.update(1)
    
pbar.close()

  0%|          | 0/3551 [00:00<?, ?it/s]

# Task 3: Query Gen

In [42]:
Q_GEN_SYSTEM_PROMPT = """You will be shown an image and a question about that image. Your task is to generate 8 short, diverse and effective web search queries that a person could type into a search engine (like Google) to find information that would help answer the question.
Each query should explore a **different** angle or approach to maximize coverage. The search engine will NOT see the image. So your search queries must be based only on what *you* can observe or infer from the image and the question — not something the search engine can figure out from the image itself.
Do not answer the question. Do not explain anything. Just return 8 search queries as a numbered list. Be specific and concise in the queries. Focus on retrieving information that would be helpful to understand the image or answer the question."""

pbar = tqdm(range(len(q_gen_task_examples)))

for ex in q_gen_task_examples:
    query = ex['query']
    message_history = [] # ex['message_history']
    search_queries = ex['search_queries']

    messages = [{"role": "system", "content": Q_GEN_SYSTEM_PROMPT}]

    if message_history:
        messages = messages + message_history
        
    messages.append({"role": "user", "content": [{"type": "image"}]})
    messages.append({"role": "user", "content": f"Question: {query}\n\nNow provide 8 useful search queries."})

    formatted_prompt = tokenizer.apply_chat_template(messages, add_generation_prompt=True, tokenize=False)
    
    this_example = {
        'session_id': ex['session_id'],
        'interaction_id': ex['interaction_id'],
        'prompt': formatted_prompt,
        'answer': search_queries,
        'task': 'query_gen'
    }

    processed_examples.append(this_example)
    pbar.update(1)
    
pbar.close()

print(f"Final task distribution: {Counter([ex['task'] for ex in processed_examples])}")
print(f"Total processed examples: {len(processed_examples)}")

  0%|          | 0/16 [00:00<?, ?it/s]

Final task distribution: Counter({'ranker': 3551, 'qa': 1621, 'query_gen': 16})
Total processed examples: 5188


# HF Dataset

In [43]:
import io
from datasets import Dataset, Image as HFImage

dataset_dict = {}
for key in processed_examples[0].keys():
    dataset_dict[key] = []

for example in tqdm(processed_examples, desc="Processing examples"):
    for key, value in example.items():
        dataset_dict[key].append(value)

Processing examples:   0%|          | 0/5188 [00:00<?, ?it/s]

In [44]:
import gc
from datasets import Dataset, Image as HFImage
from tqdm.auto import tqdm

gc.collect()

batch_size = 128
total_examples = len(processed_examples)
num_batches = (total_examples + batch_size - 1) // batch_size

print(f"Processing {total_examples} examples in {num_batches} batches of {batch_size}")

datasets_to_concat = []

for batch_idx in tqdm(range(num_batches), desc="Processing batches"):
    start_idx = batch_idx * batch_size
    end_idx = min(start_idx + batch_size, total_examples)
    batch_examples = processed_examples[start_idx:end_idx]
    
    batch_dict = {}
    for key in batch_examples[0].keys():
        batch_dict[key] = [example[key] for example in batch_examples]
    
    batch_dataset = Dataset.from_dict(batch_dict)
    datasets_to_concat.append(batch_dataset)
    
    del batch_dict, batch_examples
    gc.collect()

dataset = concatenate_datasets(datasets_to_concat)
dataset = dataset.cast_column("image", HFImage())

Processing 5188 examples in 41 batches of 128


Processing batches:   0%|          | 0/41 [00:00<?, ?it/s]

In [46]:
print(f"Created dataset with {len(dataset)} examples")
print(f"Task distribution: {Counter([ex['task'] for ex in processed_examples])}")

Created dataset with 5188 examples
Task distribution: Counter({'ranker': 3551, 'qa': 1621, 'query_gen': 16})


# Save & Upload

In [47]:
save_dir = "../data/kddcup/datamix_demo"
dataset.save_to_disk(save_dir)

Saving the dataset (0/1 shards):   0%|          | 0/5188 [00:00<?, ? examples/s]

In [48]:
import kagglehub

def upload_dataset(handle: str, local_dir: str):
    kagglehub.dataset_upload(handle, local_dir)
    
upload_dataset("conjuring92/kddcup25-mix-demo", save_dir)

Uploading Dataset https://www.kaggle.com/datasets/conjuring92/kddcup25-mix-demo ...
Starting upload for file ../data/kddcup/datamix_demo/state.json


Uploading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 327/327 [00:00<00:00, 363B/s]

Upload successful: ../data/kddcup/datamix_demo/state.json (327B)
Starting upload for file ../data/kddcup/datamix_demo/data-00000-of-00001.arrow



Uploading: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 24.5M/24.5M [00:02<00:00, 10.5MB/s]

Upload successful: ../data/kddcup/datamix_demo/data-00000-of-00001.arrow (23MB)
Starting upload for file ../data/kddcup/datamix_demo/dataset_info.json



Uploading: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 506/506 [00:00<00:00, 628B/s]

Upload successful: ../data/kddcup/datamix_demo/dataset_info.json (506B)





Your dataset instance has been created.
Files are being processed...
See at: https://www.kaggle.com/datasets/conjuring92/kddcup25-mix-demo


# Save images

save images separately for efficient processing and data loading

In [49]:
import sys
import gc

sys.path.insert(0, '../')

from crag_image_loader import ImageLoader
from datasets import load_dataset, load_from_disk
import matplotlib.pyplot as plt

In [50]:
comp_ds = load_dataset("crag-mm-2025/crag-mm-single-turn-public", revision="v0.1.2")["validation"]
comp_ds

Using the latest cached version of the dataset since crag-mm-2025/crag-mm-single-turn-public couldn't be found on the Hugging Face Hub
Found the latest cached dataset configuration 'default' at /lustre/fsw/portfolios/datascience/users/rabiswas/.cache/huggingface/datasets/crag-mm-2025___crag-mm-single-turn-public/default/0.0.0/e3a061380b9e5c7cab76f6001173872fe9149081 (last modified on Fri Jun 13 04:15:12 2025).


Dataset({
    features: ['session_id', 'image', 'image_url', 'turns', 'answers'],
    num_rows: 1938
})

In [51]:
session_to_image = {}

pbar = tqdm(comp_ds, desc="Loading images")
for example in pbar:
    session_id = example['session_id']
    image_url = example['image_url']
    
    if image_url is not None:
        img = ImageLoader(image_url).get_image()
        session_to_image[session_id] = img
    else:
        img = example['image']
        img = img.resize((960, 1280))
        session_to_image[session_id] = img
        
    pbar.update(1)

pbar.close()

print(f"Successfully loaded {sum(1 for img in session_to_image.values() if img is not None)} images")
print(f"Total sessions: {len(session_to_image)}")

Loading images:   0%|          | 0/1938 [00:00<?, ?it/s]

Successfully loaded 1938 images
Total sessions: 1938


In [52]:
from datasets import Dataset, Image as HFImage, concatenate_datasets
from tqdm.auto import tqdm

batch_size = 100
session_items = list(session_to_image.items())
total_items = len(session_items)
num_batches = (total_items + batch_size - 1) // batch_size

print(f"Processing {total_items} items in {num_batches} batches of {batch_size}")

datasets_to_concat = []

for batch_idx in tqdm(range(num_batches), desc="Processing batches"):
    start_idx = batch_idx * batch_size
    end_idx = min(start_idx + batch_size, total_items)
    batch_items = session_items[start_idx:end_idx]
    
    batch_session_ids = [item[0] for item in batch_items]
    batch_images = [item[1] for item in batch_items]
    
    batch_dataset = Dataset.from_dict({
        'session_id': batch_session_ids,
        'image': batch_images
    })
    
    datasets_to_concat.append(batch_dataset)
    
    del batch_session_ids, batch_images, batch_items
    gc.collect()

image_dataset = concatenate_datasets(datasets_to_concat)
image_dataset = image_dataset.cast_column('image', HFImage())

print(f"Created dataset with {len(image_dataset)} examples")

Processing 1938 items in 20 batches of 100


Processing batches:   0%|          | 0/20 [00:00<?, ?it/s]

Created dataset with 1938 examples


In [53]:
save_dir = "../data/kddcup/validation_image_dataset"
image_dataset.save_to_disk(save_dir)
# upload_dataset("conjuring92/kddcup25-cup-validation-images", save_dir)

Saving the dataset (0/5 shards):   0%|          | 0/1938 [00:00<?, ? examples/s]