In [1]:
import asyncio
import logging

from ragas.llms import llm_factory

from utils.evaluation import evaluate_rag_sample

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [9]:
import sys
import os

# Ubah path ini sesuai folder tempat utils berada
PROJECT_ROOT = r"D:\File_Kuliah\Colleges\GITHUB\ragas-evaluation-local-tools"

if PROJECT_ROOT not in sys.path:
    sys.path.insert(0, PROJECT_ROOT)

print("Project root set to:", PROJECT_ROOT)

Project root set to: D:\File_Kuliah\Colleges\GITHUB\ragas-evaluation-local-tools


In [11]:
from utils.chunking.fixed_size_chunking import fixed_size_chunking
from utils.io import load_csv_dataset
from utils.preprocessing import clean_dataframe, dataframe_to_ragas_dataset
from utils.evaluation import run_ragas_evaluation
from utils.visualization import plot_metric_bar
from utils.logging import log_experiment

from config.ragas_config import RAGAS_METRICS

ModuleNotFoundError: No module named 'ragas.metrics.collections'

In [None]:
with open("data/raw_document.txt", "r", encoding="utf-8") as f:
    raw_text = f.read()


In [None]:
chunks = fixed_size_chunking(
    raw_text,
    chunk_size=512,
    overlap=50,
)

print(f"Total chunks: {len(chunks)}")
chunks[:2]


In [None]:
import pandas as pd

data = {
    "question": ["Apa tujuan utama sistem ini?"] * len(chunks),
    "contexts": [[chunk] for chunk in chunks],
    "answer": ["Jawaban dari model RAG"],
}

df = pd.DataFrame(data)
df_clean = clean_dataframe(df)
ragas_dataset = dataframe_to_ragas_dataset(df_clean)


In [None]:
result = run_ragas_evaluation(
    dataset=ragas_dataset,
    metrics_config=RAGAS_METRICS,
)

result_df = result.to_pandas()
result_df


In [None]:
plot_metric_bar(result_df)

log_experiment(
    output_path="logs/experiments.csv",
    experiment_name="baseline_fixed_chunking",
    chunking_strategy="fixed_size",
    parameters={
        "chunk_size": 512,
        "overlap": 50,
    },
    metric_results=result_df,
)
