In [17]:
from pydantic import BaseModel
from openai import OpenAI
from keys import open_key
import os
from datetime import datetime
import pandas as pd

In [18]:
dataset_folder = "labeled"
datasets = ["06-13-2013audio_08DIR12-2218_labeled.csv", "09-07-2017 audio_6 APCNV-2016-565_labeled.csv", "09-12-2017audio_6 ZA-2017-210-CU-1A_labeled.csv"]

labeled_df = pd.DataFrame()

for data_file in datasets:
  file_path = dataset_folder + "/" + data_file
  data_file_df = pd.read_csv(file_path)
  labeled_df = pd.concat([labeled_df, data_file_df], ignore_index=True)

labeled_df

Unnamed: 0,speaker,text,label
0,2,"Thank you James, me, congratulations. Thank y...",
1,8,you,
2,2,That was a bad joke. We should go out to lunc...,
3,8,you We should go out. you,
4,2,"We've been waiting all day to hear from you, ...",
...,...,...,...
305,4,you,
306,8,you,
307,4,Thank you.,
308,8,"Commissioner Chun Kim, so we're going to wait...",


In [19]:
# Replace NaN values in 'label' column with 2
labeled_df['label'] = labeled_df['label'].fillna(2)
# Drop the speaker column
labeled_df = labeled_df.drop('speaker', axis=1)
# Drop the index column
# labeled_df = labeled_df.reset_index(drop=True)



#do test train split - make sure the test set is 20/20/40
#-1 is oppose, 1 is support, NaN is neither
#want 0 oppose, 1 support, 2 neither

# Convert NaN to 2 (neither), -1 to 0 (oppose), 1 stays as 1 (support)
labeled_df['label'] = labeled_df['label'].map({-1: 0, 1: 1, 2:2})

# # Create test dataset with specific label counts
# test_df = pd.concat([
#     labeled_df[labeled_df['label'] == 0].sample(n=3, random_state=42),
#     labeled_df[labeled_df['label'] == 1].sample(n=3, random_state=42), 
#     labeled_df[labeled_df['label'] == 2].sample(n=10, random_state=42)
# ])

# # Create train dataset from remaining data
# train_df = labeled_df.drop(test_df.index)

# # Convert to Dataset objects
# test_dataset = Dataset.from_pandas(test_df)
# train_dataset = Dataset.from_pandas(train_df)

# # Combine into a DatasetDict
# split_dataset = DatasetDict({
#     'train': train_dataset,
#     'test': test_dataset
# })

# print("\nTest Dataset:")
# print(split_dataset["test"])


In [20]:
labeled_df

Unnamed: 0,text,label
0,"Thank you James, me, congratulations. Thank y...",2
1,you,2
2,That was a bad joke. We should go out to lunc...,2
3,you We should go out. you,2
4,"We've been waiting all day to hear from you, ...",2
...,...,...
305,you,2
306,you,2
307,Thank you.,2
308,"Commissioner Chun Kim, so we're going to wait...",2


In [21]:
instructions = """
Prompt:
You are an expert assistant specializing in analyzing public comments from housing meetings about project proposals. Your task is to determine whether a given comment expresses support, opposition, or a neutral/unclear stance regarding the proposed project.

Classification Instructions:
	•	Support (Output: 1) → The comment clearly expresses approval, endorsement, or a positive sentiment toward the project.
	•	Example: “This development will bring much-needed housing to our community.”, “I am here on behalf of the project.”
	•	Oppose (Output: 0) → The comment clearly expresses disapproval, criticism, or a negative sentiment toward the project.
	•	Example: “This project will increase traffic and ruin the character of our neighborhood.” 
	•	Neutral/Unclear (Output: 2) → The comment is ambiguous, lacks a clear stance, discusses unrelated topics, or is procedural in nature.
	•	Example: “Have the environmental studies been completed?”

Additional Guidelines:
	•	Explicit vs. Implicit Sentiment: If a comment does not explicitly state support or opposition but implies a stance, classify accordingly.
	•	“This project will improve local businesses” → Likely support (1)
	•	“This project will increase noise pollution” → Likely oppose (0)
	•	Multiple Sentiments: If a comment contains both supportive and opposing elements, classify based on the dominant sentiment. If it is truly balanced, classify as neutral (2).
	•	“I like the new housing, but I’m worried about parking.” → Likely support (1)
	•	“This will add jobs, but it also displaces residents.” → Likely neutral (2)
	•	Unrelated or Procedural Comments: If a comment discusses general meeting logistics, requests for clarification, or unrelated concerns, classify as neutral (2).
	•	“Will there be another public hearing?” → Neutral (2)
	•	“Can we get a traffic impact report?” → Neutral (2)

Output Format:

For each comment, return a single integer:
	•	1 → Support
	•	0 → Oppose
	•	2 → Neutral/Unclear
"""

In [22]:
truelabels_ls = labeled_df['label'].tolist()
type(truelabels_ls)

list

In [23]:
text_ls = labeled_df['text'].tolist()
type(truelabels_ls)

list

In [37]:
client = OpenAI(api_key=open_key)
class Model(BaseModel):
    predicted_label: int

In [40]:
predlabels_ls = []
for comment in text_ls:
  completion = client.beta.chat.completions.parse(
    model="gpt-4o-mini",
    messages=[
        {"role": "system", "content": instructions},
        {"role": "user", "content": comment},
    ],
    response_format=Model,
)
  print(f'getting prediction for: {comment}')
  pred = completion.choices[0].message.parsed
  
  print(f'predicted label: {pred.predicted_label}')
  # predlabels_ls.append(pred.predicted_label)
  predlabels_ls.append(pred.predicted_label)

predlabels_ls

getting prediction for:  Thank you James, me, congratulations. Thank you for the nice project.  commissioners we have one last case. I think Debbie we're going to go to lunch now and  Thank you.
predicted label: 2
getting prediction for:  you
predicted label: 2
getting prediction for:  That was a bad joke. We should go out to lunch. Let's go out to lunch.
predicted label: 2
getting prediction for:  you  We should go out.  you
predicted label: 2
getting prediction for:  We've been waiting all day to hear from you, Debbie.  commissioners can we all hang in there?  up here.
predicted label: 2
getting prediction for:  Good afternoon commissioners Debbie Lawrence, Department of City Planning. This is an appeal of a density bonus compliance review case and these are director determinations and these are but the appeals go to city planning commission.
predicted label: 2
getting prediction for:  And again, I think the report is pretty clear. So if you can do it as succinctly as you can.
predic

[2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 0,
 2,
 2,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 0,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 1,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,
 2,


In [41]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, classification_report
import pandas as pd

accuracy = accuracy_score(truelabels_ls, predlabels_ls)
precision = precision_score(truelabels_ls, predlabels_ls, average='weighted')
recall = recall_score(truelabels_ls, predlabels_ls, average='weighted')

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}") 
print(f"Recall: {recall:.2f}")

report = classification_report(truelabels_ls, predlabels_ls, output_dict=True)
report_df = pd.DataFrame(report).transpose()
report_df = report_df.round(2)  # Round to 2 decimal places for better readability

report_df


Accuracy: 0.96
Precision: 0.95
Recall: 0.96


Unnamed: 0,precision,recall,f1-score,support
0,0.67,0.4,0.5,5.0
1,0.5,0.22,0.31,9.0
2,0.97,0.99,0.98,296.0
accuracy,0.96,0.96,0.96,0.96
macro avg,0.71,0.54,0.6,310.0
weighted avg,0.95,0.96,0.95,310.0


In [42]:
# Find indices where true label is 0 but predicted as 2
zero_as_two = [(i, text_ls[i]) for i in range(len(truelabels_ls)) 
               if truelabels_ls[i] == 0 and predlabels_ls[i] == 2]

# Find indices where true label is 1 but predicted as 2  
one_as_two = [(i, text_ls[i]) for i in range(len(truelabels_ls))
              if truelabels_ls[i] == 1 and predlabels_ls[i] == 2]

print("Cases where true label was 0 but predicted as 2:")
for idx, text in zero_as_two:
    print(f"Index {idx}: {text}")
    
print("\nCases where true label was 1 but predicted as 2:")
for idx, text in one_as_two:
    print(f"Index {idx}: {text}")


Cases where true label was 0 but predicted as 2:
Index 256:  or since they're next over last year's City Attorney's Office.  You can look at these three conditions and see if there's something in the findings that allow you to create, you know, what we call that nexus, you know, where we've discussed before in this commission. You know, what's the connection between these particular conditions and some effect of the project that, you know, according to the findings, you think needs to be addressed. So just as by way of guidance, if you look at finding number two, the projects location size high operations and other significant features, we'll be compatible with it. We'll not adversely affect refer to the degree adjacent properties surrounding neighborhood or the public health welfare and safety.  I think if you look at these three conditions that are proposed to be added to the project, and you look at the findings as currently drafted.  You may be able to articulate where these condit