In [1]:
import importlib

# Logging
import logging
logging.basicConfig(level=logging.ERROR, filename='test_quiz.log')
logger = logging.getLogger('ollama')

In [2]:
# Setup sentence encoder
from sentence_transformers import SentenceTransformer
st5_model = SentenceTransformer('sentence-transformers/sentence-t5-base')
embedder = st5_model.encode

In [3]:
# Set up LLM
import sys
import os
path = os.path.abspath('../..')
if path not in sys.path:
    sys.path.insert(0, path)
path = os.path.abspath('..')
if path not in sys.path:
    sys.path.insert(0, path)

from concordia.language_model import ollama_model
model = ollama_model.OllamaLanguageModel(
    model_name='llama2:13b'
    # model_name='mixtral'
)

In [None]:
import custom_components.build_quiz_agents as build_agents
importlib.reload(build_agents)

m_names = build_agents.generate_names(model, 'men')
f_names = build_agents.generate_names(model, 'women')
names = [m_names, f_names]

print(len(names[0]), len(names[1]))

In [7]:
names = [['Tom', 'Bob'], ['Jane', 'Alice']]

importlib.reload(build_agents)

scenario = 'personality'
clock = build_agents.build_clock(2024)
agent = build_agents.build_random_agent(model, names, clock, scenario)


Creating backstory for Tom using the following information:
Age: 61
Current year: 2024
Traits: openness: very low; conscientiousness: very low; extraversion: very high; agreeableness: somewhat high; neuroticism: somewhat low
Gender: male



In [11]:
from concordia.utils import measurements as measurements_lib
measurements = measurements_lib.Measurements()
import examples.custom_components.personality_quiz_metric as qm
importlib.reload(qm)

if scenario == 'personality':
    test_context = (
        '\n'
        f'{agent.name} is taking a personality quiz just for fun.'
        f'Here are a number of personality traits that may or may not apply to {agent.name}. '
        f'Please indicate the extent to which {agent.name} would agree or disagree with that statement. '
        f'{agent.name} should rate the extent to which the pair of traits applies to '
        f'{"her" if agent.gender == "female" else "him"}, even if one characteristic applies more strongly than the other.'
        '\n'
    )
    file_path = './quizzes/big_five_questions.json'
if scenario == 'trivia':
    test_context = (
        '\n'
        f"{agent.name} is taking a pop culture trivia quiz just for fun."
        f"If {agent.name} doesn't know the answer, {'she' if agent.gender == 'female' else 'he'} will still guess "
        f"and begin {'her' if agent.gender == 'female' else 'his'} answer with a single choice. "
        f"How would {agent.name} answer the following question?"
        '\n'
    )
    file_path = './quizzes/trivia_questions.json'
if scenario == "mcat":
    test_context = (
        '\n'
        f"{agent.name} is taking an MCAT preparatory quiz just for fun."
        f"If {agent.name} doesn't know the answer, {'she' if agent.gender == 'female' else 'he'} will still guess "
        f"and begin {'her' if agent.gender == 'female' else 'his'} answer with a single choice. "
        f"How would {agent.name} answer the following question?"
        '\n'
    )
    file_path = './quizzes/mcat_questions.json'

context = agent.backstory + test_context

quiz_metric = qm.QuizMetric(model, agent, file_path, measurements = measurements, verbose = True)
results = quiz_metric.observe(context, return_data=True)




Answer: a
Sample:  Based on the information provided, I would rate Tom as follows:

(a) 3: Disagree a little - Extraverted, enthusiastic. While Tom is outgoing and social, he may not be extremely extraverted or enthusiastic all the time.

(b) 7: Agree strongly - Values harmony and cooperation in relationships. Tom's agreeableness suggests that he values harmony and cooperation in his relationships, and this is a strong aspect of his personality.

(c) 5: Agree a little - Prefer structured activities and predictable routines. While Tom may enjoy structured activities and predictable routines, he may not prefer them exclusively.

(d) 1: Disagree strongly - Open-minded and curious about new ideas and experiences. Tom's low level of openness suggests that he may not be open-minded or curious about new ideas and experiences.

(e) 2: Disagree moderately - Enjoys trying new things and taking risks. While Tom may enjoy socializing and providing service, he may not be inclined to try new things 

In [35]:
import csv

with open('./data/llama2_13b_trivia_1924_agents.csv', 'w') as newfile:
    writer = csv.writer(newfile, delimiter=',', quoting=csv.QUOTE_MINIMAL)
    writer.writerow(['agent_id', 'agent_name', 'traits', 'question', 'answer', 'correct', 'run'])
    for i in range(len(all_results)):
        run = run_number[i]
        results = all_results[i]
        for result in results:
            row = [item.strip() for item in result.split(",")]
            row.append(run)
            writer.writerow(row)