In [1]:
import pandas as pd
import minsearch as minsearch
import os
OPENAI_API_KEY = os.environ["OPENAI_API_KEY"]
from openai import OpenAI
from tqdm.auto import tqdm

# Retrieval Evaluation

In [2]:
df = pd.read_csv("stoic_zen_document.csv")
df.insert(0,'id',df.index)

documents = df.to_dict(orient="records")
print("lenght of the documents:", len(documents))

index = minsearch.Index(
    text_fields=["category", "question", "answer"],
    keyword_fields=['id',"ideology"]
)

index.fit(documents)

lenght of the documents: 820


<minsearch.Index at 0x7f400a30c090>

In [3]:
df_questions = pd.read_csv("ground_truth_retrieval.csv")
ground_truth = df_questions.to_dict(orient='records')

In [4]:
def hit_rate(relevance_total):
    cnt = 0

    for line in relevance_total:
        if True in line:
            cnt = cnt + 1

    return cnt / len(relevance_total)

def mrr(relevance_total):
    total_score = 0.0

    for line in relevance_total:
        for rank in range(len(line)):
            if line[rank] == True:
                total_score = total_score + 1 / (rank + 1)

    return total_score / len(relevance_total)

def minsearch_search(query, ideology):
    boost = {'question': 3.0, 'category': 0.5}

    results = index.search(
        query=query,
        filter_dict={'ideology': ideology},
        boost_dict=boost,
        num_results=5
    )

    return results

def evaluate(ground_truth, search_function):
    relevance_total = []

    for q in tqdm(ground_truth):
        doc_id = q['id']
        results = search_function(q)
        relevance = [d['id'] == doc_id for d in results]
        relevance_total.append(relevance)

    return {
        'hit_rate': hit_rate(relevance_total),
        'mrr': mrr(relevance_total),
    }

In [5]:
ground_truth

[{'id': 0,
  'question': 'What does it mean for the world to have past eternity according to certain philosophers?',
  'ideology': 'stoicism'},
 {'id': 0,
  'question': "How do Aristotle's views on the eternity of the world compare to those of the Stoics?",
  'ideology': 'stoicism'},
 {'id': 0,
  'question': "Which philosophical schools are associated with the belief in the world's eternal existence?",
  'ideology': 'stoicism'},
 {'id': 0,
  'question': 'What is the significance of the orthodox Neoplatonist perspective on the eternity of the world?',
  'ideology': 'stoicism'},
 {'id': 0,
  'question': 'Can you provide examples of arguments that support the idea of an eternal world from these philosophers?',
  'ideology': 'stoicism'},
 {'id': 1,
  'question': "What specific link do Alexander's Ethical Problems provide between early commentary and later medieval interpretations of ethics?",
  'ideology': 'stoicism'},
 {'id': 1,
  'question': "How do Alexander's essays address major theme

In [6]:
evaluate(ground_truth, lambda q: minsearch_search(q['question'],q['ideology']))

  0%|          | 0/4100 [00:00<?, ?it/s]

{'hit_rate': 0.8043902439024391, 'mrr': 0.6790121951219511}

# Finding the best parameters

In [10]:
df_validation = df_questions[:100]
df_test = df_questions[100:]

In [None]:
import random

def simple_optimize(param_ranges, objective_function, n_iterations=10):
    best_params = None
    best_score = float('-inf')  # Assuming we're minimizing. Use float('-inf') if maximizing.

    for _ in range(n_iterations):
        # Generate random parameters
        current_params = {}
        for param, (min_val, max_val) in param_ranges.items():
            if isinstance(min_val, int) and isinstance(max_val, int):
                current_params[param] = random.randint(min_val, max_val)
            else:
                current_params[param] = random.uniform(min_val, max_val)
        
        # Evaluate the objective function
        current_score = objective_function(current_params)
        
        # Update best if current is better
        if current_score > best_score:  # Change to > if maximizing
            best_score = current_score
            best_params = current_params
    
    return best_params, best_score

# Ingestion

In [39]:
pd.set_option('display.max_colwidth', None)  # Show all columns
df = pd.read_csv("stoic_zen_document.csv")
df.insert(0,'id',df.index)

In [40]:
documents = df.to_dict(orient="records")
print("lenght of the documents:", len(documents))

index = minsearch.Index(
    text_fields=["category", "question", "answer"],
    keyword_fields=['id',"ideology"]
)

index.fit(documents)

lenght of the documents: 820


<minsearch.Index at 0x7f48a6394e50>

# Rag Flow

In [41]:
client = OpenAI()

def minsearch_search(query, ideology):
    boost = {'question': 3.0, 'category': 0.5}

    results = index.search(
        query=query,
        filter_dict={'ideology': ideology},
        boost_dict=boost,
        num_results=5
    )

    return results
    
def llm(prompt):
    response = client.chat.completions.create(
        model='gpt-4o-mini',
        messages=[{"role": "user", "content": prompt}]
    )
    
    return response.choices[0].message.content

def build_prompt(query, search_results):
    prompt_template = """
    You're a philosophy teaching assistant. Answer the QUESTION based on the CONTEXT from the FAQ database.
    Use only the facts from the CONTEXT when answering the QUESTION. Provide a real life quote proving your point,stating the author as well.
    
    QUESTION: {question}
    
    CONTEXT: 
    {context}
    """.strip()

    entry_template = """
    category: {category}
    question: {question}
    answer: {answer}
    ideology: {ideology}
    """.strip()

    context = ""
    
    for doc in search_results:
        context = context + entry_template.format(**doc) + "\n\n"
    
    prompt = prompt_template.format(question=query, context=context).strip()
    return prompt

def rag(query,ideology):
    
    search_results = minsearch_search(query,ideology)
    prompt = build_prompt(query, search_results)
    answer = llm(prompt)

    return answer

In [42]:
query = "what are the cardinal basic virtues of stoicism?"
ideology = "stoicism"

answer = rag(query,ideology)
print(answer)

The cardinal basic virtues of Stoicism are courage, justice, and moderation. According to Stoicism, these virtues are unified and stem from a comprehensive understanding of what is good and bad across various circumstances. 

A relevant quote that illustrates this idea comes from the Roman Stoic philosopher Seneca, who said, "A good character, when established, is the foundation of all virtue." This emphasizes the unity and foundational nature of virtue in Stoic thought.
