# QueryGym Docker Quick Start

This notebook demonstrates basic QueryGym usage within the Docker container.

## Prerequisites
- Container is running with mounted volumes
- OPENAI_API_KEY environment variable is set

In [None]:
# Import QueryGym
import querygym as qg
import os

print(f"QueryGym version: {qg.__version__}")
print(f"API key configured: {'Yes' if os.getenv('OPENAI_API_KEY') else 'No'}")

## 1. Create Sample Queries

In [None]:
# Create sample queries
sample_queries = [
    qg.QueryItem(qid="1", text="what is covid-19"),
    qg.QueryItem(qid="2", text="how does machine learning work"),
    qg.QueryItem(qid="3", text="best practices for docker containers")
]

print(f"Created {len(sample_queries)} sample queries")
for q in sample_queries:
    print(f"  {q.qid}: {q.text}")

## 2. Create a Reformulator

In [None]:
# Create a reformulator using GenQR method
reformulator = qg.create_reformulator(
    method="genqr",
    model="gpt-3.5-turbo",  # or "gpt-4" for better quality
    temperature=0.7
)

print(f"Created reformulator: {reformulator.__class__.__name__}")

## 3. Reformulate a Single Query

In [None]:
# Reformulate the first query
result = reformulator.reformulate(sample_queries[0])

print("Original query:", result.original)
print("Reformulated query:", result.reformulated)
print("Metadata:", result.metadata)

## 4. Batch Reformulation

In [None]:
# Reformulate all queries
results = reformulator.reformulate_batch(sample_queries)

print(f"Reformulated {len(results)} queries:\n")
for r in results:
    print(f"QID {r.qid}:")
    print(f"  Original: {r.original}")
    print(f"  Reformulated: {r.reformulated}")
    print()

## 5. Save Results

In [None]:
# Save reformulated queries to outputs directory
output_queries = [qg.QueryItem(r.qid, r.reformulated) for r in results]
qg.DataLoader.save_queries(output_queries, "outputs/reformulated_queries.tsv")

print("Saved reformulated queries to: outputs/reformulated_queries.tsv")

## 6. Explore Available Methods

In [None]:
# List all available reformulation methods
from querygym.core.base import BaseReformulator

methods = BaseReformulator.list_methods()
print(f"Available methods ({len(methods)}):")
for method in sorted(methods):
    print(f"  - {method}")

## 7. Explore Prompt Bank

In [None]:
# List all prompts in the prompt bank
prompt_bank = qg.PromptBank()
prompts = prompt_bank.list()

print(f"Available prompts ({len(prompts)}):")
for prompt_id in sorted(prompts):
    meta = prompt_bank.get_meta(prompt_id)
    print(f"  - {prompt_id}: {meta.get('description', 'No description')}")

## 8. Check PySerini Installation

In [None]:
# Verify PySerini is available
try:
    import pyserini
    print(f"✓ PySerini is installed")
    print(f"  Version: {pyserini.__version__}")
except ImportError:
    print("✗ PySerini is not available")

## 9. Check PyTorch & GPU

In [None]:
# Check PyTorch and GPU availability
import torch

print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

if torch.cuda.is_available():
    print(f"CUDA version: {torch.version.cuda}")
    print(f"GPU count: {torch.cuda.device_count()}")
    print(f"GPU name: {torch.cuda.get_device_name(0)}")
else:
    print("Running on CPU (no GPU detected)")

## Next Steps

- Load your own queries from `data/` directory
- Try different reformulation methods
- Experiment with different LLM models and parameters
- Check the [documentation](https://querygym.readthedocs.io/) for more examples