In [1]:
from pydantic import BaseModel
from openai import OpenAI
from keys import open_key
import os
from datetime import datetime
import pandas as pd

In [2]:
dataset_folder = "labeled"
datasets = ["06-13-2013audio_08DIR12-2218_labeled.csv", "09-07-2017 audio_6 APCNV-2016-565_labeled.csv", "09-12-2017audio_6 ZA-2017-210-CU-1A_labeled.csv", "5_DIR_2022_5446_labeled.csv", "6_ZA_2022_6776_labeled.csv", "07_CPC_2022_6189_labeled.csv"]

labeled_df = pd.DataFrame()

for data_file in datasets:
  file_path = dataset_folder + "/" + data_file
  data_file_df = pd.read_csv(file_path)
  labeled_df = pd.concat([labeled_df, data_file_df], ignore_index=True)

labeled_df

Unnamed: 0,speaker,text,label
0,2,"Thank you James, me, congratulations. Thank y...",
1,8,you,
2,2,That was a bad joke. We should go out to lunc...,
3,8,you We should go out. you,
4,2,"We've been waiting all day to hear from you, ...",
...,...,...,...
1082,36,Commissioner Lyshay? Yes. Commissioner Zamora?,
1083,38,you,
1084,3,Yes.,
1085,36,Commissioner Vice President Cho? Yes. And the...,


In [3]:
# Replace NaN values in 'label' column with 2
labeled_df['label'] = labeled_df['label'].fillna(2)
# Drop the speaker column
labeled_df = labeled_df.drop('speaker', axis=1)
# Drop the index column
# labeled_df = labeled_df.reset_index(drop=True)



#do test train split - make sure the test set is 20/20/40
#-1 is oppose, 1 is support, NaN is neither
#want 0 oppose, 1 support, 2 neither

# Convert NaN to 2 (neither), -1 to 0 (oppose), 1 stays as 1 (support)
# Convert NaN to 2 (neither), -1 to 0 (oppose), 1 stays as 1 (support)
labeled_df['label'] = labeled_df['label'].map({-1: 0, 1: 1, 2:2})

# create test and trainh set
test_df = pd.concat([
    labeled_df[labeled_df['label'] == 0].sample(n=8, random_state=42),
    labeled_df[labeled_df['label'] == 1].sample(n=8, random_state=42), 
    labeled_df[labeled_df['label'] == 2].sample(n=30, random_state=42)
])

# train_df = labeled_df.drop(test_df.index)
# Drop test_df indices from labeled_df
temp_df = labeled_df.drop(test_df.index)
# Limit to only 100 samples with label 2, keep all samples with labels 0 and 1
label_2_samples = temp_df[temp_df['label'] == 2].sample(n=100, random_state=42)
other_samples = temp_df[temp_df['label'] != 2]
train_df = pd.concat([other_samples, label_2_samples])

In [4]:
train_df

Unnamed: 0,text,label
9,So staff recommends denying the appeal and th...,1
10,"after a new commissioners, Nicole Sanchez wit...",1
14,Okay. So here is a summary of the appeal poi...,1
47,"So the appeal points, we responded since the ...",1
116,"Hello, everyone. My name is Lucerito Martinez...",1
...,...,...
594,"Anybody else? Okay, great. Alright, so let's...",2
96,you,2
711,"This Commissioner Margill is and yeah, I thin...",2
172,This Christine Sapnara from Planning Departme...,2


In [5]:
instructions = """
Prompt:
You are an expert assistant specializing in analyzing public comments from housing meetings about project proposals. Your task is to determine whether a given comment expresses support, opposition, or a neutral/unclear stance regarding the proposed project.

Classification Instructions:
	•	Support (Output: 1) → The comment clearly expresses approval, endorsement, or a positive sentiment toward the project.
	•	Example: “This development will bring much-needed housing to our community.”, “I am here on behalf of the project.”
	•	Oppose (Output: 0) → The comment clearly expresses disapproval, criticism, or a negative sentiment toward the project.
	•	Example: “This project will increase traffic and ruin the character of our neighborhood.” 
	•	Neutral/Unclear (Output: 2) → The comment is ambiguous, lacks a clear stance, discusses unrelated topics, or is procedural in nature.
	•	Example: “Have the environmental studies been completed?”

Additional Guidelines:
	•	Explicit vs. Implicit Sentiment: If a comment does not explicitly state support or opposition but implies a stance, classify accordingly.
	•	“This project will improve local businesses” → Likely support (1)
	•	“This project will increase noise pollution” → Likely oppose (0)
	•	Multiple Sentiments: If a comment contains both supportive and opposing elements, classify based on the dominant sentiment. If it is truly balanced, classify as neutral (2).
	•	“I like the new housing, but I’m worried about parking.” → Likely support (1)
	•	“This will add jobs, but it also displaces residents.” → Likely neutral (2)
	•	Unrelated or Procedural Comments: If a comment discusses general meeting logistics, requests for clarification, or unrelated concerns, classify as neutral (2).
	•	“Will there be another public hearing?” → Neutral (2)
	•	“Can we get a traffic impact report?” → Neutral (2)

Output Format:

For each comment, return a single integer:
	•	1 → Support
	•	0 → Oppose
	•	2 → Neutral/Unclear
"""

In [6]:
train_truelabels_ls = train_df['label'].tolist()
len(train_truelabels_ls)

150

In [7]:
train_text_ls = train_df['text'].tolist()
len(train_text_ls)

150

In [8]:
test_truelabels_ls = test_df['label'].tolist()
len(test_truelabels_ls)

46

In [9]:
test_text_ls = test_df['text'].tolist()
len(test_text_ls)

46

In [10]:



client = OpenAI(api_key=open_key)
class Model(BaseModel):
    predicted_label: int

In [11]:
# client = OpenAI(api_key=open_key)


# response = client.chat.completions.create(
#     model="gpt-4o-mini",  # Base model to fine-tune
# )




# Prepare data for fine-tuning
# We need to format the data as per OpenAI's fine-tuning requirements
# Each example should have a system message, user message, and assistant response

# Define label mapping for clarity
label_to_text = {
    0: "Oppose",
    1: "Support",
    2: "Neutral/Unclear"
}

# Create fine-tuning data for training set
train_finetune_data = []

for text, label in zip(train_text_ls, train_truelabels_ls):
    # Skip entries with NaN labels
    if pd.isna(label):
        continue
        
    # Convert label to integer if it's not already
    label = int(label)
    
    # Create a training example in the required format
    example = {
        "messages": [
            {"role": "system", "content": instructions},
            {"role": "user", "content": text},
            {"role": "assistant", "content": str(label)}
        ]
    }
    train_finetune_data.append(example)

# Create test set data in the same format
test_finetune_data = []

for text, label in zip(test_text_ls, test_truelabels_ls):
    # Skip entries with NaN labels
    if pd.isna(label):
        continue
        
    # Convert label to integer if it's not already
    label = int(label)
    
    # Create a test example in the required format
    example = {
        "messages": [
            {"role": "system", "content": instructions},
            {"role": "user", "content": text},
            {"role": "assistant", "content": str(label)}
        ]
    }
    test_finetune_data.append(example)

# Save the training data to a JSONL file
import json
with open("train_finetune_data.jsonl", "w") as f:
    for example in train_finetune_data:
        f.write(json.dumps(example) + "\n")

# Save the test data to a JSONL file
with open("test_finetune_data.jsonl", "w") as f:
    for example in test_finetune_data:
        f.write(json.dumps(example) + "\n")

print(f"Created {len(train_finetune_data)} training examples for fine-tuning")
print(f"Created {len(test_finetune_data)} test examples")
print("First training example:")
print(json.dumps(train_finetune_data[0], indent=2))
print("First test example:")
print(json.dumps(test_finetune_data[0], indent=2) if test_finetune_data else "No test examples available")


Created 150 training examples for fine-tuning
Created 46 test examples
First training example:
{
  "messages": [
    {
      "role": "system",
      "content": "\nPrompt:\nYou are an expert assistant specializing in analyzing public comments from housing meetings about project proposals. Your task is to determine whether a given comment expresses support, opposition, or a neutral/unclear stance regarding the proposed project.\n\nClassification Instructions:\n\t\u2022\tSupport (Output: 1) \u2192 The comment clearly expresses approval, endorsement, or a positive sentiment toward the project.\n\t\u2022\tExample: \u201cThis development will bring much-needed housing to our community.\u201d, \u201cI am here on behalf of the project.\u201d\n\t\u2022\tOppose (Output: 0) \u2192 The comment clearly expresses disapproval, criticism, or a negative sentiment toward the project.\n\t\u2022\tExample: \u201cThis project will increase traffic and ruin the character of our neighborhood.\u201d \n\t\u2022

In [12]:
client = OpenAI(api_key=open_key)


train_output_file_path = "train_finetune_data.jsonl"
validation_output_file_path = "test_finetune_data.jsonl"

train_file = client.files.create(
  file=open(train_output_file_path, "rb"),
  purpose="fine-tune"
)

valid_file = client.files.create(
  file=open(validation_output_file_path, "rb"),
  purpose="fine-tune"
)

print(f"Training file Info: {train_file}")
print(f"Validation file Info: {valid_file}")

Training file Info: FileObject(id='file-XnKkGAxrHQ9qC2inDNsqKx', bytes=486616, created_at=1741708916, filename='train_finetune_data.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)
Validation file Info: FileObject(id='file-E26pc1541FxeS85mxkzwb4', bytes=128421, created_at=1741708916, filename='test_finetune_data.jsonl', object='file', purpose='fine-tune', status='processed', status_details=None, expires_at=None)


In [13]:
model = client.fine_tuning.jobs.create(
  training_file=train_file.id, 
  validation_file=valid_file.id,
  model="gpt-4o-mini-2024-07-18", 
  hyperparameters={
    "n_epochs": 1,
	"batch_size": 3,
	"learning_rate_multiplier": 0.2
  }
)
job_id = model.id
status = model.status

print(f'Fine-tuning model with jobID: {job_id}.')
print(f"Training Response: {model}")
print(f"Training Status: {status}")

Fine-tuning model with jobID: ftjob-xxsXVXOOQwssARjWzHnx2OVH.
Training Response: FineTuningJob(id='ftjob-xxsXVXOOQwssARjWzHnx2OVH', created_at=1741708921, error=Error(code=None, message=None, param=None), fine_tuned_model=None, finished_at=None, hyperparameters=Hyperparameters(batch_size=3, learning_rate_multiplier=0.2, n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=[], seed=1984918551, status='validating_files', trained_tokens=None, training_file='file-XnKkGAxrHQ9qC2inDNsqKx', validation_file='file-E26pc1541FxeS85mxkzwb4', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=3, learning_rate_multiplier=0.2, n_epochs=1)), type='supervised'), user_provided_suffix=None, metadata=None)
Training Status: validating_files


In [15]:
client.fine_tuning.jobs.retrieve(job_id)

FineTuningJob(id='ftjob-xxsXVXOOQwssARjWzHnx2OVH', created_at=1741708921, error=Error(code=None, message=None, param=None), fine_tuned_model='ft:gpt-4o-mini-2024-07-18:personal::B9wTa0K9', finished_at=1741709432, hyperparameters=Hyperparameters(batch_size=3, learning_rate_multiplier=0.2, n_epochs=1), model='gpt-4o-mini-2024-07-18', object='fine_tuning.job', organization_id='org-J2DCfq0j1nXInCHinBoPqK5v', result_files=['file-5pRAkXvvvboYeRcmpyddYF'], seed=1984918551, status='succeeded', trained_tokens=95280, training_file='file-XnKkGAxrHQ9qC2inDNsqKx', validation_file='file-E26pc1541FxeS85mxkzwb4', estimated_finish=None, integrations=[], method=Method(dpo=None, supervised=MethodSupervised(hyperparameters=MethodSupervisedHyperparameters(batch_size=3, learning_rate_multiplier=0.2, n_epochs=1)), type='supervised'), user_provided_suffix=None, metadata=None)

In [16]:
result = client.fine_tuning.jobs.list()

# Retrieve the fine tuned model
fine_tuned_model = result.data[0].fine_tuned_model
print(fine_tuned_model)

ft:gpt-4o-mini-2024-07-18:personal::B9wTa0K9


In [17]:
class Model(BaseModel):
    predicted_label: int


predlabels_ls = []
for comment in test_text_ls:
  completion = client.beta.chat.completions.parse(
    model=fine_tuned_model,  # Use the fine-tuned model instead of base model
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": comment},
    ],
    response_format=Model,
)
  print(f'getting prediction for: {comment}')
  pred = completion.choices[0].message.parsed
  
  print(f'predicted label: {pred.predicted_label}')
  # predlabels_ls.append(pred.predicted_label)
  predlabels_ls.append(pred.predicted_label)

predlabels_ls

getting prediction for:  Still the Blackstone retired certified Arbor.  And I wanna bring up  the ball.  That's good.  of claiming that all these trees  are going to be added because  their plan to be put in.  Why is your father sick?  So you have plenty of options.  This is not work for a tree that wants to get a hundred feet tall.  Yeah.  you  more of that project materials to  and the arborist report to say that they can move.  Thank you.  45 to 12.  and 34 people are sick of words.  to another place on a project.  It's really...  a fabrication.  because you aren't leaving any space to plant trees around this project.  you're  Jordan and I are narrowing to a point.  the cleansing space.  But otherwise, he requires. Thank you. That is your time.
predicted label: 0
getting prediction for:  thank you, Madam President. My name is  Marco Brionis  I'm a supervisor at the West Los Angeles Police Station, which is located at 1663 Butler Avenue. The previous vice investigator that had testif

[0,
 2,
 0,
 0,
 0,
 1,
 0,
 2,
 1,
 1,
 1,
 1,
 1,
 1,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2]

In [18]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
import pandas as pd

accuracy = accuracy_score(test_truelabels_ls, predlabels_ls)
precision = precision_score(test_truelabels_ls, predlabels_ls, average='weighted')
recall = recall_score(test_truelabels_ls, predlabels_ls, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}") 
print(f"Recall: {recall:.2f}")

report = classification_report(test_truelabels_ls, predlabels_ls, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.round(2)  # Round to 2 decimal places for better readability

report_df


Accuracy: 0.91
Precision: 0.92
Recall: 0.91


Unnamed: 0,precision,recall,f1-score,support
0,1.0,0.62,0.77,8.0
1,0.88,0.88,0.88,8.0
2,0.91,1.0,0.95,30.0
accuracy,0.91,0.91,0.91,0.91
macro avg,0.93,0.83,0.87,46.0
weighted avg,0.92,0.91,0.91,46.0


In [19]:
len(predlabels_ls)

46

In [20]:
# Find indices where true label is 0 but predicted as 2
zero_as_two = [(i, text_ls[i]) for i in range(len(truelabels_ls)) 
               if truelabels_ls[i] == 0 and predlabels_ls[i] == 2]

# Find indices where true label is 1 but predicted as 2  
one_as_two = [(i, text_ls[i]) for i in range(len(truelabels_ls))
              if truelabels_ls[i] == 1 and predlabels_ls[i] == 2]

print("Cases where true label was 0 but predicted as 2:")
for idx, text in zero_as_two:
    print(f"Index {idx}: {text}")
    
print("\nCases where true label was 1 but predicted as 2:")
for idx, text in one_as_two:
    print(f"Index {idx}: {text}")


NameError: name 'truelabels_ls' is not defined