#### データソースを準備

In [None]:
from trafilatura import fetch_url, extract

url = 'https://www.shugiin.go.jp/internet/itdb_annai.nsf/html/statics/shiryo/dl-constitution.htm'
filename = 'kenpo.txt'

document = fetch_url(url)
text = extract(document)

if text is not None:
    with open(filename, 'w', encoding='utf-8') as f:
        f.write(text)
else:
    print("No text could be extracted from the document.")


#### チャンク分割

In [None]:
from langchain.document_loaders import TextLoader

loader = TextLoader(filename, encoding='utf-8')
raw_docs = loader.load()

from langchain.text_splitter import CharacterTextSplitter

text_splitter = CharacterTextSplitter.from_tiktoken_encoder(
    separator="\n",
    chunk_size=100,
    chunk_overlap=0,
)

docs = text_splitter.split_documents(raw_docs)

print(len(docs))
print(docs[0])


#### embedding

In [None]:
import os
from dotenv import load_dotenv
from langchain.embeddings.openai import OpenAIEmbeddings

load_dotenv()

# OpenAIのEmbeddingモデルを読み込む
embeddings = OpenAIEmbeddings(deployment=os.environ["DEPLOYMENT_NAME_EMBEDDINGS"])

from langchain.vectorstores import Chroma

db = Chroma.from_documents(docs, embeddings)

In [17]:
from ragas.metrics import (
    context_precision,
    answer_relevancy,
    faithfulness,
    context_recall,
)
from ragas.metrics.critique import harmfulness

# list of metrics we're going to use
metrics = [
    faithfulness,
    answer_relevancy,
    context_recall,
    context_precision,
    harmfulness,
]

#### ragasをAzure OpenAIで使用する

In [13]:
import os
from dotenv import load_dotenv

from langchain.chat_models import AzureChatOpenAI
from langchain.embeddings import OpenAIEmbeddings
from ragas.llms import LangchainLLM

load_dotenv()

# Azure OpenAI Chatモデルの組み立て
azure_model = AzureChatOpenAI(
    deployment_name=os.environ["DEPLOYMENT_NAME"],
    model=os.environ["MODEL_NAME"],
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_type=os.environ["OPENAI_API_TYPE"],  # "azure"
)

# azure_modelのインスタンスをラップする
ragas_azure_model = LangchainLLM(azure_model)
# 
answer_relevancy.llm = ragas_azure_model

# Embeddingモデルの組み立て
azure_embeddings = OpenAIEmbeddings(
    deployment=os.environ["DEPLOYMENT_NAME_EMBEDDINGS"],
    model=os.environ["MODEL_NAME_EMBEDDINGS"],
    openai_api_base=os.environ["OPENAI_API_BASE"],
    openai_api_type=os.environ["OPENAI_API_TYPE"], 
)

answer_relevancy.embeddings = azure_embeddings

In [21]:
for m in metrics:
    m.__setattr__("llm", ragas_azure_model)
    m.__setattr__("embeddings", azure_embeddings)

In [18]:
# data
from datasets import load_dataset

fiqa_eval = load_dataset("explodinggradients/fiqa", "ragas_eval")
fiqa_eval

DatasetDict({
    baseline: Dataset({
        features: ['question', 'ground_truths', 'answer', 'contexts'],
        num_rows: 30
    })
})

In [22]:
from ragas import evaluate

result = evaluate(
    fiqa_eval["baseline"].select(range(3)), # selecting only 3
    metrics=metrics,
)

result

evaluating with [faithfulness]


  0%|          | 0/1 [00:00<?, ?it/s]

100%|██████████| 1/1 [00:16<00:00, 16.08s/it]
  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


evaluating with [answer_relevancy]


100%|██████████| 1/1 [00:04<00:00,  4.63s/it]
  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


evaluating with [context_recall]


100%|██████████| 1/1 [00:11<00:00, 11.80s/it]
  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


evaluating with [context_precision]


100%|██████████| 1/1 [00:01<00:00,  1.41s/it]
  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


evaluating with [harmfulness]


100%|██████████| 1/1 [00:03<00:00,  3.33s/it]
  block_group = [InMemoryTable(cls._concat_blocks(list(block_group), axis=axis))]
  table = cls._concat_blocks(blocks, axis=0)


{'ragas_score': 0.0000, 'faithfulness': 0.8667, 'answer_relevancy': 0.9152, 'context_recall': 0.6250, 'context_precision': 0.0000, 'harmfulness': 0.0000}

In [23]:
df = result.to_pandas()
df.head()

Unnamed: 0,question,contexts,answer,ground_truths,faithfulness,answer_relevancy,context_recall,context_precision,harmfulness
0,How to deposit a cheque issued to an associate...,[Just have the associate sign the back and the...,\nThe best way to deposit a cheque issued to a...,[Have the check reissued to the proper payee.J...,1.0,0.937405,0.875,0.0,0
1,Can I send a money order from USPS as a business?,[Sure you can. You can fill in whatever you w...,"\nYes, you can send a money order from USPS as...",[Sure you can. You can fill in whatever you w...,0.8,0.884969,1.0,0.0,0
2,1 EIN doing business under multiple business n...,[You're confusing a lot of things here. Compan...,"\nYes, it is possible to have one EIN doing bu...",[You're confusing a lot of things here. Compan...,0.8,0.923236,0.0,0.0,0
