In [1]:
from langchain.evaluation import load_evaluator, load_evaluators
from langchain_community.llms.ollama import Ollama

eval_llm = Ollama(model="llama3")
student_llm = Ollama(model="llama3")

evals = load_evaluator("qa", llm=eval_llm)
scores = { "CORRECT": 0, "INCORRECT": 0, "UNKNOWN": 0 }

result = evals.invoke({
    "query": "What is the capital of France?",
    "answer": "Paris",
    "result": "The capital of france is Paris"})
results = result["results"]

if "INCORRECT" in results:
    scores["INCORRECT"] += 1
elif "CORRECT" in results:
    scores["CORRECT"] += 1
else:
    scores["UNKNOWN"] += 1

print(scores)


{'CORRECT': 1, 'INCORRECT': 0, 'UNKNOWN': 0}
{'query': 'What is the capital of France?', 'answer': 'Paris', 'result': 'The capital of france is Paris', 'results': 'QUESTION: What is the capital of France?\nSTUDENT ANSWER: The capital of france is Paris\nTRUE ANSWER: Paris\nGRADE: CORRECT'}


In [2]:
import pandas as pd
df = pd.read_parquet("./go_emotions_train.parquet")
emotions = df.columns[1:29]
print(emotions)

# select the emotions text based on the 1 or 0 value in the column
df["emotions"] = df[emotions].apply(lambda x: ", ".join(emotion for emotion in emotions if x[emotion] == 1), axis=1)

Index(['admiration', 'amusement', 'anger', 'annoyance', 'approval', 'caring',
       'confusion', 'curiosity', 'desire', 'disappointment', 'disapproval',
       'disgust', 'embarrassment', 'excitement', 'fear', 'gratitude', 'grief',
       'joy', 'love', 'nervousness', 'optimism', 'pride', 'realization',
       'relief', 'remorse', 'sadness', 'surprise', 'neutral'],
      dtype='object')


In [60]:
from langchain_core.prompts import PromptTemplate
prompt = """You are an Emotional AI, capable of detecting emotions in text.
Please read the following text and tell me what emotions you detect in it.
{text}
"""

chain = PromptTemplate.from_template(prompt) | student_llm

In [62]:
scores = { "CORRECT": 0, "INCORRECT": 0, "UNKNOWN": 0 }
for i, row in df.head(1000).iterrows():
    result = evals.invoke({
        "query": prompt.replace("{text}", row["text"]),
        "answer": 'The true emotions are:' + row["emotions"],
        "result":  row["emotions"]
    })
    results = result["results"]

    if "INCORRECT" in results:
        print(result)
        scores["INCORRECT"] += 1
    elif "CORRECT" in results:
        scores["CORRECT"] += 1
    else:
        print(result)
        scores["UNKNOWN"] += 1

print(scores)

KeyboardInterrupt: 

In [64]:
scores = { "CORRECT": 0, "INCORRECT": 0, "UNKNOWN": 0 }
for i, row in df.head(100).iterrows():
    result = evals.invoke({
        "query": prompt.replace("{text}", row["text"]),
        "answer": 'The true emotions are:' + row["emotions"],
        "result": chain.invoke({"text": row["text"]})
    })
    results = result["results"]

    if "INCORRECT" in results:
        scores["INCORRECT"] += 1
    elif "CORRECT" in results:
        scores["CORRECT"] += 1
    else:
        scores["UNKNOWN"] += 1

print(scores)