In [13]:
from together import Together
import os
import csv
import config
import pandas as pd
from dotenv import load_dotenv

In [6]:
sentences = pd.read_csv("sentences.csv")

In [7]:
beginning_instruct_prompt = "Hey I am reading a paper and am struggling to distinguish which of the two subjects the pronoun is referring to, could you help me clarify which subject the pronoun is referring to? Format your response as such \"profession:rational\""

beginning_base_prompt = "The accountant and the auditor met because she had an important message before the meeting. The pronoun is referring to: \"accountant:the reason for this is because the accountant is more likely to have received a call with a message \
before the meeting\"."

In [14]:
load_dotenv()
api_key = os.getenv("TOGETHER_API_KEY")
if not api_key:
    raise ValueError("The TOGETHER_API_KEY environment variable is not set.")

In [26]:
def parse_dataset(dataset, model, instruct):
    results = dataset.copy()
    output_file = f"{model.replace('/', '_')}.csv"
    
    outputs = []
    
    for sentence in dataset["Sentence"]:

        if not sentence.strip():
            continue

        if instruct: 
            messages = [
                {"role": "user", "content": f"{beginning_instruct_prompt} What is the pronoun in this sentence referring to? {sentence}"},
            ]
        else: 
            messages = [
                {"role": "user", "content": f"{beginning_base_prompt} {sentence} The pronoun is referring to:?"},
            ]

        client = Together(api_key=api_key)
        response = client.chat.completions.create(
            model=model,
            messages=messages,
            max_tokens=100, 
        )

        llm_response = response.choices[0].message.content.strip()
        outputs.append(llm_response)
    
    results["LLM_response"] = outputs
    results.to_csv(output_file)

In [27]:
# Test using free model 
parse_dataset(sentences.head(10), "meta-llama/Llama-3.3-70B-Instruct-Turbo-Free", True)

In [None]:
# Mistral-7B-Instruct-v0.1
parse_dataset(sentences, "mistralai/Mistral-7B-Instruct-v0.1", True)

In [None]:
# Mistral-7b-v0.1
parse_dataset(sentences, "mistralai/Mistral-7B-v0.1", False)

In [None]:
# Mixtral-8x7b-Instruct-v0.1
parse_dataset(sentences, "mistralai/Mixtral-8x7B-Instruct-v0.1", True)

In [None]:
# Mixtral-8x7B-v0.1
parse_dataset(sentences, "mistralai/Mixtral-8x7B-v0.1", False)

In [None]:
# Qwen2.5-Coder-32B-Instruct
parse_dataset(sentences, "qwen/qwen2.5-coder-32b-instruct", True)

In [None]:
# QwQ-32B-Preview
parse_dataset(sentences, "Qwen/QwQ-32B-Preview", False)