In [1]:
import dspy
from decouple import config
from typing import Literal, List

OPENROUTER_API_KEY = config("OPENROUTER_API_KEY")

In [2]:
lm = dspy.LM("openrouter/qwen/qwen-2.5-72b-instruct", api_key=OPENROUTER_API_KEY, api_base="https://openrouter.ai/api/v1",)
dspy.configure(lm=lm)

In [3]:
class Answer(dspy.Signature):
    """Answer a question based on the provided documents."""

    question: str = dspy.InputField()
    context: str = dspy.InputField()
    correct_answers: List[str] = dspy.OutputField()
    explanation: str = dspy.OutputField()

In [4]:
answer = dspy.Predict(Answer)

### Processing data into dspy examples

In [5]:
import ujson

file_path = "/Users/app/Documents/ramdocs/RAMDocs/src/data/split_data/RAMDocs_test_train.jsonl"

data = []
with open(file_path) as f:
    for line in f:
        line = ujson.loads(line)
        line_data = {
            'question': line['question'],
            'context': "\n".join(["Document " + str(i+1) + ": " + doc['text'] for i, doc in enumerate(line['documents'])]),
            "documents": line['documents'],
            "disambig_entity": line['disambig_entity'],
            "gold_answers": line['gold_answers'],
            "wrong_answers": line['wrong_answers']
        }
        data.append(line_data)
    # data = [ujson.loads(line) for line in f]

In [6]:
data[0]

{'question': 'What is the profession of C. Mayer?',
 'context': 'Document 1: Christa Mayer Christa Mayer is a German operatic mezzo-soprano. She is particularly known for her portrayal of Erda in Richard Wagner\'s "Ring Cycle"; a role which she has performed several times at the Bayreuth Festival and recorded on the BBC Legends Record Label. Christa Mayer was born in Sulzbach-Rosenberg, Germany in Bavaria. After graduating from the Bavarian Academy of Singing, she pursued further studies at the Munich Academy of Music where she was a pupil of tenor Thomas Moser. She won several notable singing competitions, including prizes in the ARD International Music Competition in Munich and the International Robert Schumann\nDocument 2: Christa Mayer is a German professional basketball player. She is particularly known for her exceptional skills on the court and has played several times for the national team. Christa Mayer was born in Sulzbach-Rosenberg, Germany in Bavaria. After graduating from 

In [7]:
data = [dspy.Example(**d).with_inputs('question', 'context') for d in data]

# Let's pick an `example` here from the data.
example = data[2]
example

Example({'question': 'What is the population of Sandusky Township, Ohio?', 'context': "Document 1: Sandusky Township, Crawford County, Ohio Sandusky Township is one of the sixteen townships of Crawford County, Ohio, United States. As of the 2010 census the population was 459. Located in the eastern part of the county, it borders the following townships: No municipalities are located in Sandusky Township. Sandusky Township was named from the Sandusky River, which flows through its southern part. Statewide, other Sandusky Townships are located in Richland and Sandusky counties. The township is governed by a three-member board of trustees, who are elected in November of odd-numbered years to a four-year term beginning on the following January\nDocument 2: from Wikimedia project Spanish Wikipedia located in the administrative territorial entity Crawford County 1 reference imported from Wikimedia project English Wikipedia coordinate location 40°51'5 '' N, 82°49'22 '' W 1 reference imported 

In [8]:
import random

random.Random(0).shuffle(data)
trainset, devset = data[:200], data[200:1000]

len(trainset), len(devset)

(200, 50)

### Baseline evaluation

In [9]:
pred = answer(**example.inputs())

In [13]:
example

Example({'question': 'What is the population of Sandusky Township, Ohio?', 'context': "Document 1: Sandusky Township, Crawford County, Ohio Sandusky Township is one of the sixteen townships of Crawford County, Ohio, United States. As of the 2010 census the population was 459. Located in the eastern part of the county, it borders the following townships: No municipalities are located in Sandusky Township. Sandusky Township was named from the Sandusky River, which flows through its southern part. Statewide, other Sandusky Townships are located in Richland and Sandusky counties. The township is governed by a three-member board of trustees, who are elected in November of odd-numbered years to a four-year term beginning on the following January\nDocument 2: from Wikimedia project Spanish Wikipedia located in the administrative territorial entity Crawford County 1 reference imported from Wikimedia project English Wikipedia coordinate location 40°51'5 '' N, 82°49'22 '' W 1 reference imported 

In [10]:
pred

Prediction(
    correct_answers=['1,234'],
    explanation='The population of Sandusky Township, Ohio, as of the 2020 census, was 1,234. This information is provided in Document 2 and Document 3, which both reference the 2020 United States Census.'
)

In [18]:
import unicodedata
import re
import string

def normalize_text(s):
    s = unicodedata.normalize("NFD", s)

    def remove_articles(text):
        return re.sub(r"\b(a|an|the)\b", " ", text)

    def white_space_fix(text):
        return " ".join(text.split())

    def remove_punc(text):
        exclude = set(string.punctuation)
        return "".join(ch for ch in text if ch not in exclude)

    def lower(text):
        return text.lower()

    return white_space_fix(remove_articles(remove_punc(lower(s))))

def evaluation_metric(example, pred):
    correct_count = 0
    llm_answers = [normalize_text(answer) for answer in pred.correct_answers]
    gold_answers = [normalize_text(answer) for answer in example.gold_answers]
    wrong_answers = [normalize_text(answer) for answer in example.wrong_answers]
    
    # Check if llm_answers contains all gold answers and no wrong answers
    all_gold_included = all(gold in llm_answers for gold in gold_answers)
    no_wrong_included = all(wrong not in llm_answers for wrong in wrong_answers)
    
    if all_gold_included and no_wrong_included:
        score = 1
    else:
        score = 0
    return score

evaluation_metric(example, pred)

0