In [67]:
import numpy as np
from typing import List, Tuple
import os
import openai
import re

import openai
from openai import OpenAI
import evaluate

openai.api_key = key



In [73]:
def train_test_split(member: str, test_size: float = 0.2, seed: int = 42, data_path: str = '/work/users/s/m/smerrill/Albemarle/dataset') -> Tuple[List[dict], List[dict]]:
    """
    Splits the dataset into training and test sets. Synthetic data is always added to the training set.

    Parameters:
    - member: The name identifier for the board member.
    - test_size: Proportion of the real (non-synthetic) data to include in the test split.
    - seed: Random seed for reproducibility.
    - data_path: Base directory for the dataset files.

    Returns:
    - A tuple (train_data, test_data)
    """
    real_data, synth_data = [], []

    if member == 'kateacuff':
        real_data = np.load(os.path.join(data_path, 'kateacuff_train.npy'))
        synth_data = np.load(os.path.join(data_path, 'synth_kateacuff.npy'))
        test_data = np.load(os.path.join(data_path, 'kateacuff_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'kateacuff_train_completion.npy'), allow_pickle=True)

        
    elif member == 'ellenosborne':
        real_data = np.load(os.path.join(data_path, 'ellenosborne_train.npy'))
        synth_data = np.load(os.path.join(data_path, 'synth_ellenosborne.npy'))
        test_data = np.load(os.path.join(data_path, 'ellenosborne_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'ellenosborne_train_completion.npy'), allow_pickle=True)
        
    elif member == 'grahampaige':
        real_data = np.load(os.path.join(data_path, 'grahampaige_train.npy'))
        synth_data = np.load(os.path.join(data_path, 'synth_grahampaige.npy'))
        test_data = np.load(os.path.join(data_path, 'grahampaige_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'grahampaige_train_completion.npy'), allow_pickle=True)                             
        
    elif member == 'judyle':
        real_data = np.load(os.path.join(data_path, 'judyle_train.npy'))
        synth_data = np.load(os.path.join(data_path, 'synth_judyle.npy'))
        test_data = np.load(os.path.join(data_path, 'judyle_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'judyle_train_completion.npy'), allow_pickle=True)
        
    elif member == 'katrinacallsen':
        real_data = np.load(os.path.join(data_path, 'katrinacallsen_train.npy'))
        test_data = np.load(os.path.join(data_path, 'katrinacallsen_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'katrinacallsen_train_completion.npy'), allow_pickle=True)
        
    elif member == 'davidoberg':
        real_data = np.load(os.path.join(data_path, 'davidoberg_train.npy'))
        test_data = np.load(os.path.join(data_path, 'davidoberg_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'davidoberg_train_completion.npy'), allow_pickle=True)
        
    elif member == 'jonnoalcaro':
        real_data = np.load(os.path.join(data_path, 'jonnoalcaro_train.npy'))
        test_data = np.load(os.path.join(data_path, 'jonnoalcaro_test.npy'), allow_pickle=True)
        train_completion_data = np.load(os.path.join(data_path, 'jonnoalcaro_train_completion.npy'), allow_pickle=True)
        
    else:
        raise ValueError(f"Unknown member: {member}")

    if not 0 < test_size < 1:
        raise ValueError("test_size must be a float between 0 and 1.")

    train_data = list(real_data) + list(synth_data)
    return train_data, test_data, train_completion_data


def convert_to_chat_completion(text):
    # Split the text using your special markers
    segments = re.split(r'<\|start_header_id\|>(.*?)<\|end_header_id\|>', text)

    messages = []
    role = None

    for i in range(1, len(segments), 2):
        role = segments[i].strip()
        content = segments[i + 1].strip()

        # Remove <|begin_of_text|> and <|eot_id|> markers if present
        content = content.replace('<|begin_of_text|>', '').replace('<|eot_id|>', '').strip()

        messages.append({
            "role": role,
            "content": content
        })

    return messages


def compute_metrics(generated_texts, reference_texts):

    # Compute metrics
    bleu_score = bleu.compute(predictions=generated_texts, references=[[r] for r in reference_texts])
    rouge_score = rouge.compute(predictions=generated_texts, references=reference_texts)
    bertscore_result = bertscore.compute(predictions=generated_texts, references=reference_texts, lang="en")

    # Average BERTScore F1
    avg_bertscore_f1 = sum(bertscore_result['f1']) / len(bertscore_result['f1'])
            
    return bleu_score, rouge_score, bertscore_result, avg_bertscore_f1

In [41]:
train_data, test_data, train_completion_data = train_test_split('kateacuff')

In [88]:

system_message = { "role": "system",
  "content": (
    "You are completing a transcript of a public Albemarle County School Board meeting.\n\n"
    "Your task is to continue the dialogue in the voice of **Dr. Kate Acuff**, a long-serving and thoughtful School Board member who represents the Jack Jouett Magisterial District. She brings a deep background in law, public health, and education policy, and she is a strong advocate for mental health services, equitable education, and teacher support.\n\n"
    "Dr. Acuff's tone is professional, reflective, respectful, and policy-focused. She often expresses appreciation for thoughtful processes, values student input, and is attentive to the long-term impacts of Board decisions.\n\n"
    "Please complete the next line in the dialogue, starting with `kateacuff:` and continuing naturally based on the conversation so far.\n\n"
    "**In-context example**:\n"
    "Previous messages:\n"
    "speaker 1: Thank you all for coming here today. I believe we have a presentations to kick things off unless anyone has other more important matters.\n"
    "speaker 2: No I think we can go ahead and begin with the presentation, kate will you be presenting today?\n"
    "kateacuff: Yes I will presenting on the health and saftey of our schools."

    "\n\n"
      
    "Now continue previous dialogue as:\n"
    "  kateacuff:"
  )
}

messages = convert_to_chat_completion(test_data[0]['prompt'])[:-1]
messages.append(system_message)

In [89]:
messages

[{'role': 'user',
  'content': "davidoberg: Patrick, can you just bring up the map for 2.2 again real quick? Sure can, yep.\npatrickmclaughlin: Here you go. And Mr. Oberg, that comprises the neighborhoods of Gray Rock, Wayland's Grant and Bargaman Park in this larger area of Western Ridge, Wickham Pond. Oops, sorry, I thought it was sharing, but it's not. So is it coming through now? Yes. Great. So that comprises the neighborhoods of Gray Rock here, number one, Wayland's Grant and Bargaman Park. number two, and Western Ridge Wickham Pond. That also has some additional neighborhoods in there. I think Foothill Crossing is one of the newer ones that's over there, but that whole area that we've dubbed Western Ridge Wickham Pond.\nunknownspeaker: So there are no islands being created.\npotnekclou: That is correct. There are no islands. There was one scenario that we put before the advisory committee that had an island there. And that was outside of the guiding principles that were provided 

In [48]:
client = OpenAI(api_key=key)  # Replace with your actual key

response = client.chat.completions.create(
    model="gpt-4o",  # or gpt-4-turbo, gpt-3.5-turbo
    messages=messages,
)

print(response.choices[0].message.content)

In [52]:
len(test_data)

20

In [55]:
generated_text40 = []
reference_text = []

for item in test_data:
    messages = convert_to_chat_completion(item['prompt'])[:-1]
    messages.append(system_message)
    
    response = client.chat.completions.create(
    model="gpt-4o",  # or gpt-4-turbo, gpt-3.5-turbo
    messages=messages,
    )
    generated_text40.append(response.choices[0].message.content)
    reference_text.append(item['completion'])

In [66]:
np.save('gpt40.npy', generated_text35)

In [56]:
generated_text35 = []
reference_text = []

for item in test_data:
    messages = convert_to_chat_completion(item['prompt'])[:-1]
    messages.append(system_message)
    
    response = client.chat.completions.create(
    model="gpt-3.5-turbo",
    messages=messages,
    )
    generated_text35.append(response.choices[0].message.content)
    reference_text.append(item['completion'])

In [65]:
np.save('gpt3.5.npy', generated_text35)

In [71]:
bleu = evaluate.load("bleu")
rouge = evaluate.load("rouge")
bertscore = evaluate.load("bertscore")

In [75]:
metrics_list = []
bleu_score, rouge_score, bertscore_result, avg_bertscore_f1 = compute_metrics(generated_text35, reference_text)

metrics_list.append({
    "ModelName": 'GPT3.5-Turbo',
    "dataset": 'kateacuff',
    "BLEU": bleu_score["bleu"],
    "ROUGE-L": rouge_score["rougeL"],
    "BERTScore_F1": avg_bertscore_f1
})


Some weights of RobertaModel were not initialized from the model checkpoint at roberta-large and are newly initialized: ['pooler.dense.bias', 'pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [91]:
bleu_score, rouge_score, bertscore_result, avg_bertscore_f1 = compute_metrics(generated_text40, reference_text)

metrics_list.append({
    "ModelName": 'GPT4o',
    "dataset": 'kateacuff',
    "BLEU": bleu_score["bleu"],
    "ROUGE-L": rouge_score["rougeL"],
    "BERTScore_F1": avg_bertscore_f1
})


In [95]:
import pandas as pd
pd.DataFrame([metrics_list[0], metrics_list[2]])

Unnamed: 0,ModelName,dataset,BLEU,ROUGE-L,BERTScore_F1
0,GPT3.5-Turbo,kateacuff,0.007075,0.113478,0.837643
1,GPT4o,kateacuff,0.0,0.101245,0.834214


In [80]:
messages[

[{'role': 'user',
  'content': "jonnoalcaro: I understand, thank you for that. And my last question has to do with, I know that there have been some challenges this year in hiring TAs and in some cases, teachers. What's your strategy for the next upcoming school year?\nunknownspeaker: We'll partner right with as, as many as I mentioned, as many departments as we can write to boost and to get out the word and use our community connections, as much as possible which we've developed over time. So we've done national stuff we work very closely with. Folks are supports down in human resources. And so, through professional organizations, we publish and post on the walls at universities, not just UVA, but in the area, we have connections as far up as Philadelphia for placements for internships to try and foster students to come in internship here and stay. Uh, so we have a lot of things going on. We take this very serious. Um, but as far as, so that's, that's sort of the projected, um, which 

In [90]:
print(messages[-1]['content'])

You are completing a transcript of a public Albemarle County School Board meeting.

Your task is to continue the dialogue in the voice of **Dr. Kate Acuff**, a long-serving and thoughtful School Board member who represents the Jack Jouett Magisterial District. She brings a deep background in law, public health, and education policy, and she is a strong advocate for mental health services, equitable education, and teacher support.

Dr. Acuff's tone is professional, reflective, respectful, and policy-focused. She often expresses appreciation for thoughtful processes, values student input, and is attentive to the long-term impacts of Board decisions.

Please complete the next line in the dialogue, starting with `kateacuff:` and continuing naturally based on the conversation so far.

**In-context example**:
Previous messages:
speaker 1: Thank you all for coming here today. I believe we have a presentations to kick things off unless anyone has other more important matters.
speaker 2: No I t