In [2]:
!pip install openai



In [36]:
import json
from openai import OpenAI

client = OpenAI(api_key="Secret key")




SCHEMA = {
    "type": "object",
    "properties": {
        "label": {"type": "string", "enum": ["related", "unrelated", "uncertain"]},
        "confidence": {"type": "number", "minimum": 0, "maximum": 1},
        "rationale": {"type": "string"}
    },
    "required": ["label", "confidence", "rationale"],
    "additionalProperties": False
}

INSTRUCTIONS = """Classify Serbian news headlines as related/unrelated to the anti-corruption protests and student blockades.
Use only the headline. If unclear, use uncertain.
Keep rationale short (<= 15 words)."""

def label_headline(headline: str) -> dict:
    resp = client.responses.create(
        model="gpt-5-mini",
        instructions=INSTRUCTIONS,
        input=[{"role": "user", "content": headline}],
        text={
            "format": {
                "type": "json_schema",
                "name": "headline_label",
                "strict": True,
                "schema": SCHEMA,
            }
        },

    )
    return json.loads(resp.output_text)



print(label_headline("pala nadstresnica"))

AuthenticationError: Error code: 401 - {'error': {'message': 'Incorrect API key provided: Secret key. You can find your API key at https://platform.openai.com/account/api-keys.', 'type': 'invalid_request_error', 'param': None, 'code': 'invalid_api_key'}}

In [6]:
import pandas as pd

n1=pd.read_csv("/content/random_headlines.csv", sep=";")
headline=n1["headline"]

In [7]:
gpt_result = pd.DataFrame()

In [15]:


for h in headline:
    row = pd.Series(label_headline(h)).to_frame().T
    gpt_result = pd.concat([gpt_result, row], ignore_index=True)

gpt_result = gpt_result[["label", "confidence", "rationale"]]



In [16]:
gpt_result.to_csv("gpt_result.csv")

In [17]:
gpt_result

Unnamed: 0,label,confidence,rationale
0,related,0.88,Direct appeal about resolving Serbia's crisis ...
1,uncertain,0.45,"Mentions a billboard and mayor, but no explici..."
2,unrelated,0.9,"Military exercises in Kosovo, unrelated to ant..."
3,uncertain,0.65,No mention of protests or student blockades; c...
4,related,0.92,Discusses LRAD use at a Belgrade protest — dir...
...,...,...,...
573,unrelated,0.92,About passport rankings; no mention of protest...
574,unrelated,0.98,"About UK rejoining Erasmus+ education program,..."
575,unrelated,0.92,"Diplomatic appointment, unrelated to anti-corr..."
576,related,0.93,"Describes an attack occurring at a blockade, d..."


In [23]:
n1["true_label"] = n1["true_label"].str.lower()

In [24]:
true_values=n1["true_label"]
predicted_values=gpt_result["label"]

In [25]:
true_values

Unnamed: 0,true_label
0,related
1,uncertain
2,unrelated
3,related
4,related
...,...
573,related
574,related
575,related
576,related


In [26]:
import pandas as pd
from sklearn.metrics import confusion_matrix

labels = ["related", "uncertain", "unrelated"]

cm = confusion_matrix(true_values, predicted_values, labels=labels)

cm_df = pd.DataFrame(cm, index=[f"true_{l}" for l in labels],
                        columns=[f"pred_{l}" for l in labels])
cm_df


Unnamed: 0,pred_related,pred_uncertain,pred_unrelated
true_related,151,109,212
true_uncertain,0,2,8
true_unrelated,2,2,92


In [27]:
from sklearn.metrics import classification_report

labels = ["related", "uncertain", "unrelated"]
print(classification_report(true_values, predicted_values, labels=labels, zero_division=0))

              precision    recall  f1-score   support

     related       0.99      0.32      0.48       472
   uncertain       0.02      0.20      0.03        10
   unrelated       0.29      0.96      0.45        96

    accuracy                           0.42       578
   macro avg       0.43      0.49      0.32       578
weighted avg       0.86      0.42      0.47       578



0.9320987654320988
