In [1]:
from google.colab import drive
drive.mount("/content/drive")

Mounted at /content/drive


In [2]:
!pip -q install -U ragas datasets langchain-community langchain-core chromadb sentence-transformers transformers

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.0/52.0 kB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m466.5/466.5 kB[0m [31m24.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m520.7/520.7 kB[0m [31m33.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m92.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m502.2/502.2 kB[0m [31m28.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m21.5/21.5 MB[0m [31m70.9 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.4/10.4 MB[0m [31m94.3 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m278.2/278.2 kB[0m [31m29.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [3]:
!pip -q install "requests==2.32.4"
!pip -q install \
  "opentelemetry-sdk==1.38.0" \
  "opentelemetry-exporter-otlp-proto-http==1.38.0" \
  "opentelemetry-exporter-otlp-proto-common==1.38.0" \
  "opentelemetry-proto==1.38.0"

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/64.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m64.8/64.8 kB[0m [31m4.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-community 0.4.1 requires requests<3.0.0,>=2.32.5, but you have requests 2.32.4 which is incompatible.
opentelemetry-exporter-otlp-proto-http 1.38.0 requires opentelemetry-exporter-otlp-proto-common==1.38.0, but you have opentelemetry-exporter-otlp-proto-common 1.39.1 which is incompatible.
opentelemetry-exporter-otlp-proto-http 1.38.0 requires opentelemetry-proto==1.38.0, but you have opentelemetry-proto 1.39.1 which is incompatible.
opentelemetry-exporter-otlp-proto-http 1.38.0 requires opentelemetry-sdk~=1.38.0, but you have opentelemetry-sdk 1.39.1 which is incompa

In [5]:
!pip -q install "requests==2.32.4" \
  "opentelemetry-api==1.38.0" \
  "opentelemetry-sdk==1.38.0" \
  "opentelemetry-proto==1.38.0" \
  "opentelemetry-exporter-otlp-proto-common==1.38.0" \
  "opentelemetry-exporter-otlp-proto-http==1.38.0" \
  "opentelemetry-exporter-otlp-proto-grpc==1.38.0"

## RAGAS eval from traces (no re-retrieval)

In [9]:
import os, json, torch
import chromadb
from datasets import Dataset

from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_core.language_models.llms import LLM
from typing import Optional, List

from ragas import evaluate
try:
    # new-style (preferred)
    from ragas.metrics.collections import faithfulness, answer_relevancy, context_precision
except Exception:
    # fallback for older ragas
    from ragas.metrics import faithfulness, answer_relevancy, context_precision

from langchain_community.embeddings import HuggingFaceEmbeddings

BASE_DIR   = "/content/drive/MyDrive/agentic-rag-telecom-thesis"
TRACE_PATH = f"{BASE_DIR}/results/traces/baseline_traces.jsonl"
CHROMA_DIR = f"{BASE_DIR}/results/chroma_baseline"

# ---- open chroma
client = chromadb.PersistentClient(path=CHROMA_DIR)
col = client.get_collection("doc2dial_baseline")

# ---- load traces
rows = [json.loads(l) for l in open(TRACE_PATH, "r", encoding="utf-8")]
N = min(50, len(rows))
rows = rows[:N]

questions = [r["query"] for r in rows]
answers   = [r["prediction"] for r in rows]
gts       = [r["reference"] for r in rows]

# ---- contexts from trace ids (deterministic)
contexts = []
for r in rows:
    ids = [f'{x["doc_id"]}__{x["chunk_id"]}' for x in r["retrieved"]]
    got = col.get(ids=ids, include=["documents"])
    contexts.append(got["documents"])

ds = Dataset.from_dict({
    "question": questions,
    "answer": answers,
    "contexts": contexts,
    "ground_truth": gts,
})


ERROR:chromadb.telemetry.product.posthog:Failed to send telemetry event ClientStartEvent: module 'chromadb' has no attribute '__version__'


In [10]:
from pydantic import PrivateAttr
import torch
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
from langchain_core.language_models.llms import LLM
from typing import Optional, List

class FlanT5Local(LLM):
    model_name: str = "google/flan-t5-base"
    max_new_tokens: int = 256

    _device: str = PrivateAttr()
    _tok = PrivateAttr()
    _model = PrivateAttr()

    @property
    def _llm_type(self) -> str:
        return "flan_t5_local"

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._device = "cuda" if torch.cuda.is_available() else "cpu"
        self._tok = AutoTokenizer.from_pretrained(self.model_name)
        self._model = AutoModelForSeq2SeqLM.from_pretrained(self.model_name).to(self._device)

    def _call(self, prompt: str, stop: Optional[List[str]] = None, **kwargs) -> str:
        inputs = self._tok(prompt, return_tensors="pt", truncation=True, max_length=2048).to(self._device)
        with torch.no_grad():
            out = self._model.generate(**inputs, max_new_tokens=self.max_new_tokens)
        text = self._tok.decode(out[0], skip_special_tokens=True).strip()
        if stop:
            for s in stop:
                text = text.split(s)[0]
        return text.strip()

llm = FlanT5Local()
print("LLM device:", llm._device)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


config.json: 0.00B [00:00, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json: 0.00B [00:00, ?B/s]

model.safetensors:   0%|          | 0.00/990M [00:00<?, ?B/s]

Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



generation_config.json:   0%|          | 0.00/147 [00:00<?, ?B/s]

LLM device: cuda


In [12]:
llm = FlanT5Local()

# ---- embeddings for RAGAS
emb = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# ---- wrapper compatibility (works across ragas versions)
try:
    from ragas.llms import LangchainLLMWrapper
    llm_for_ragas = LangchainLLMWrapper(llm)
except Exception:
    llm_for_ragas = llm

try:
    from ragas.embeddings import LangchainEmbeddingsWrapper
    emb_for_ragas = LangchainEmbeddingsWrapper(emb)
except Exception:
    emb_for_ragas = emb

Loading weights:   0%|          | 0/282 [00:00<?, ?it/s]



Loading weights:   0%|          | 0/103 [00:00<?, ?it/s]

[1mBertModel LOAD REPORT[0m from: sentence-transformers/all-MiniLM-L6-v2
Key                     | Status     |  | 
------------------------+------------+--+-
embeddings.position_ids | UNEXPECTED |  | 

[3mNotes:
- UNEXPECTED[3m	:can be ignored when loading from different task/architecture; not ok if you expect identical arch.[0m
  llm_for_ragas = LangchainLLMWrapper(llm)
  emb_for_ragas = LangchainEmbeddingsWrapper(emb)


ValueError: Collections metrics only support modern InstructorLLM. Found: LangchainLLMWrapper. Use: llm_factory('gpt-4o-mini', client=openai_client)

In [18]:
!pip -q install -U ragas openai

import os
from openai import AsyncOpenAI
from ragas import evaluate
from ragas.llms import llm_factory
from ragas.embeddings import OpenAIEmbeddings
from ragas.metrics import Faithfulness, AnswerRelevancy, ContextPrecision
os.environ["OPENAI_API_KEY"] = userdata.get("OPENAI_API_KEY")
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])

llm = llm_factory("gpt-4o-mini", client=client)
emb = OpenAIEmbeddings(client=client, model="text-embedding-3-small")  # signature per docs :contentReference[oaicite:2]{index=2}

metrics = [
    Faithfulness(llm=llm),
    AnswerRelevancy(llm=llm, embeddings=emb),
    ContextPrecision(llm=llm),
]

res = evaluate(dataset=ds, metrics=metrics, llm=llm, embeddings=emb, show_progress=True)  # evaluate API :contentReference[oaicite:3]{index=3}
df = res.to_pandas()
means = df[["faithfulness","answer_relevancy","context_precision"]].mean().to_dict()
print("RAGAS means:", means)

out_csv = "/content/drive/MyDrive/agentic-rag-telecom-thesis/results/baseline_ragas.csv"
df.to_csv(out_csv, index=False)
print("Saved:", out_csv)

  from ragas.metrics import Faithfulness, AnswerRelevancy, ContextPrecision
  from ragas.metrics import Faithfulness, AnswerRelevancy, ContextPrecision
  from ragas.metrics import Faithfulness, AnswerRelevancy, ContextPrecision


Evaluating:   0%|          | 0/150 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[13]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[10]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[4]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[7]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[1]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[16]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[25]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[19]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executo

RAGAS means: {'faithfulness': 0.6354166666666666, 'answer_relevancy': nan, 'context_precision': 0.32347222220798216}
Saved: /content/drive/MyDrive/agentic-rag-telecom-thesis/results/baseline_ragas.csv


In [20]:
from openai import AsyncOpenAI
from ragas.embeddings import OpenAIEmbeddings
from ragas.metrics import AnswerRelevancy

# Use a small slice (no rebuild needed)
ds_small = ds.select(range(min(20, len(ds))))

# OpenAI client
client = AsyncOpenAI(api_key=os.environ["OPENAI_API_KEY"])

# Try this first:
emb2 = OpenAIEmbeddings(model="text-embedding-3-small", client=client)

# If the line above errors with unexpected 'client', use this instead:
# emb2 = OpenAIEmbeddings(model="text-embedding-3-small", openai_client=client)

metric = [AnswerRelevancy(llm=llm, embeddings=emb2)]

# Slow it down to avoid TPM spikes
from ragas.run_config import RunConfig
rc = RunConfig(max_workers=1)

res2 = evaluate(dataset=ds_small, metrics=metric, llm=llm, embeddings=emb2, run_config=rc, batch_size=1, show_progress=True)
df2 = res2.to_pandas()
print("answer_relevancy mean:", df2["answer_relevancy"].mean())

  from ragas.metrics import AnswerRelevancy


Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

Batch 1/20:   0%|          | 0/1 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[0]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[1]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[2]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[3]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[4]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[5]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[6]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exception raised in Job[7]: AttributeError('OpenAIEmbeddings' object has no attribute 'embed_query')
ERROR:ragas.executor:Exc

answer_relevancy mean: nan


In [21]:
from openai import OpenAI
import numpy as np
from ragas.metrics import AnswerRelevancy
from ragas.run_config import RunConfig

# small sample to avoid rate limits
ds_small = ds.select(range(min(20, len(ds))))

client = OpenAI(api_key=os.environ["OPENAI_API_KEY"])

class OpenAIEmbeddingsAdapter:
    def __init__(self, model="text-embedding-3-small", batch=64):
        self.model = model
        self.batch = batch

    def _embed_batch(self, texts):
        resp = client.embeddings.create(model=self.model, input=texts)
        return [d.embedding for d in resp.data]

    def embed_documents(self, texts):
        out = []
        for i in range(0, len(texts), self.batch):
            out.extend(self._embed_batch(texts[i:i+self.batch]))
        return out

    def embed_query(self, text):
        return self._embed_batch([text])[0]

emb_ok = OpenAIEmbeddingsAdapter()

metric = [AnswerRelevancy(llm=llm, embeddings=emb_ok)]
rc = RunConfig(max_workers=1)

res2 = evaluate(dataset=ds_small, metrics=metric, llm=llm, embeddings=emb_ok, run_config=rc, batch_size=1, show_progress=True)
df2 = res2.to_pandas()
print("answer_relevancy mean:", float(np.nanmean(df2["answer_relevancy"])))

  from ragas.metrics import AnswerRelevancy


Evaluating:   0%|          | 0/20 [00:00<?, ?it/s]

Batch 1/20:   0%|          | 0/1 [00:00<?, ?it/s]



answer_relevancy mean: 0.22787599059731728
