# Mistral test


In [1]:
import pandas as pd
from mistralai import Mistral

from mistral_key import key

api_key = key
model = "mistral-small-2409"

client = Mistral(api_key=api_key)

context = "Shipping costs 5€ in the EU, 10€ outside of the EU in Europe, 15€ to the US and Canada and 20€ everywhere else."

question = 'How much is shipping to Somalia?'

prompt = f"""
Context information is below.
---------------------
{context}
---------------------
You are a shop assistant. Be polite with the customer. Don't give any explanation. Reply briefly.
Given the context information and not prior knowledge, answer the query. 
Query: {question}
Answer:
"""

chat_response = client.chat.complete(
    model=model,
    messages=[
        {
            "role": "user",
            "content": prompt,
        },
    ]
)
ans = chat_response.choices[0].message.content
print(ans)

Shipping to Somalia is 20€.


In [10]:
chat_response

ChatCompletionResponse(id='1e3111fd63984f548de394238b473119', object='chat.completion', model='mistral-small-2409', usage=UsageInfo(prompt_tokens=111, completion_tokens=11, total_tokens=122), created=1728046422, choices=[ChatCompletionChoice(index=0, message=AssistantMessage(content='Shipping to Somalia is 20€.', tool_calls=None, prefix=False, role='assistant'), finish_reason='stop')])

In [11]:
print(chat_response.model)
print(chat_response.usage.prompt_tokens)
print(chat_response.usage.completion_tokens)



mistral-small-2409
111
11


In [3]:
prompt_template = """
    You are an expert evaluator for a Retrieval-Augmented Generation (RAG) system.
    Your task is to analyze the relevance of the generated answer to the given question.
    Based on the relevance of the generated answer, you will classify it
    as "NON_RELEVANT", "PARTLY_RELEVANT", or "RELEVANT".

    Here is the data for evaluation:

    Question: {question}
    Generated Answer: {answer}

    Please analyze the content and context of the generated answer in relation to the question
    and provide your evaluation in parsable JSON without using code blocks:

    {{
      "Relevance": "NON_RELEVANT" | "PARTLY_RELEVANT" | "RELEVANT",
      "Explanation": "[Provide a brief explanation for your evaluation]"
    }}
    """.strip()

prompt_eval = prompt_template.format(question=question, answer=ans)
chat_response_eval = client.chat.complete(
    model=model,
    messages=[
        {
            "role": "user",
            "content": prompt_eval,
        },
    ]
)
eval_ans = chat_response_eval.choices[0].message.content
print(eval_ans)

{
      "Relevance": "PARTLY_RELEVANT",
      "Explanation": "The generated answer provides a specific cost for shipping to Somalia, which directly addresses the question. However, the currency (€) used might not be relevant to all users, as they may expect the cost in a different currency. Therefore, the answer is partly relevant."
    }


In [5]:
import json
json.loads(eval_ans)

{'Relevance': 'PARTLY_RELEVANT',
 'Explanation': 'The generated answer provides a specific cost for shipping to Somalia, which directly addresses the question. However, the currency (€) used might not be relevant to all users, as they may expect the cost in a different currency. Therefore, the answer is partly relevant.'}

# Ground truth generation


In [15]:
questions_path = "data/questions.json"
import json
import hashlib

# Open and load the JSON file
with open(questions_path, 'r') as file:
    questions = json.load(file)

questions

{'questions': [{'question': 'How can I create an account?',
   'answer': "To create an account, click on the 'Sign Up' button on the top right corner of our website and follow the instructions to complete the registration process."},
  {'question': 'What payment methods do you accept?',
   'answer': 'We accept major credit cards, debit cards, and PayPal as payment methods for online orders.'},
  {'question': 'How can I track my order?',
   'answer': "You can track your order by logging into your account and navigating to the 'Order History' section. There, you will find the tracking information for your shipment."},
  {'question': 'What is your return policy?',
   'answer': 'Our return policy allows you to return products within 30 days of purchase for a full refund, provided they are in their original condition and packaging. Please refer to our Returns page for detailed instructions.'},
  {'question': 'Can I cancel my order?',
   'answer': 'You can cancel your order if it has not bee

In [23]:
import hashlib

docs = questions['questions']


def generate_document_id(doc):
    combined = f"{doc['question']}-{doc['answer'][:10]}"
    hash_object = hashlib.md5(combined.encode())
    hash_hex = hash_object.hexdigest()
    document_id = hash_hex[:8]
    return document_id


for doc in docs:
    doc['id'] = generate_document_id(doc)

pd.DataFrame(docs).to_csv('data/questions-raw.csv', index=False)


In [94]:
prompt_template = """
Emulate a client who is planning to buy at our shop.
Formulate 5 questions this client might ask based on a FAQ record. The record
should contain the answer to the questions, and the questions should be complete and not too short.
If possible, use as fewer words as possible from the record. 

The record:

question: {question}
answer: {answer}

Provide the output in parsable list:

["question1", "question2", ...]
""".strip()

In [95]:
def generate_question(doc):
    prompt = prompt_template.format(**doc)

    chat_response = client.chat.complete(
        model=model,
        messages=[
            {
                "role": "user",
                "content": prompt,
            },
        ]
    )
    ans = chat_response.choices[0].message.content
    return ans

In [97]:
import time
from tqdm.auto import tqdm

gen_questions = {}

for doc in tqdm(docs[:1]):
    q = generate_question(doc)
    gen_questions[doc['id']] = json.loads(q)
    time.sleep(6)

100%|██████████| 80/80 [10:43<00:00,  8.04s/it]


In [98]:
final = []
for doc_id, questions in gen_questions.items():
    for question in questions:
        final.append((doc_id, question))


In [99]:
import pandas as pd

df_questions = pd.DataFrame(final, columns=['doc_id', 'question'])
# df_questions.to_csv("data/ground_truth.csv", index=False)


In [88]:
df_questions

Unnamed: 0,doc_id,question
0,a5825c73,How do I set up an account on your website?
1,a5825c73,Where can I find the 'Sign Up' button?
2,a5825c73,What steps should I follow to complete the reg...
3,a5825c73,Is there a specific place on the website where...
4,a5825c73,Can you guide me through the account creation ...
5,db025979,What forms of payment are accepted for online ...
6,db025979,Which credit cards can I use to pay for my order?
7,db025979,Can I pay using my debit card on your website?
8,db025979,Is PayPal an option for paying on your site?
9,db025979,What are the acceptable payment methods for on...
