In [1]:
# Imports necessaris
import json
import os
import re
import pandas as pd
import openai

from pathlib import Path
from functools import lru_cache
from dotenv import load_dotenv
from typing import Optional, List, TypedDict
from duckduckgo_search import DDGS
from IPython.display import display, Image

from typing import TypedDict, Literal

from langchain_openai import ChatOpenAI
from langchain.prompts import PromptTemplate

from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_transformers import BeautifulSoupTransformer

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
# Dades de referència amb el model gpt-4.5
with open("resultats/resultats_4_5.json", "r", encoding="utf-8") as f:
    data_4_5 = json.load(f)

# Carreguem el dataset de notícies
file_path = '../final_dataset_v2.csv'  
# Step 3: Read the CSV file
df = pd.read_csv(file_path, low_memory=False)

In [3]:
# Creem una funció per trobar tots els articles que tenen almenys un "No" en compliment
def find_articles_with_no_compliment(data_json):
    # Dictionary to store article_id → full JSON for those with at least one "No" in compliment
    articles_with_no_dict = {}

    # Iterate through each article's analysis
    for article_id, criteria in data_json.items():
        for key, value in criteria.items():
            if isinstance(value, dict):
                if value.get("compliment", "").strip().lower() == "no":
                    articles_with_no_dict[article_id] = criteria
                    break  # Skip remaining keys for this article

    return articles_with_no_dict

# Ara una funció per comptar el nombre de criteris per cada tema
def count_criteria_by_topic(data_json):

    # Count compliments for each category across all articles
    category_counts = {}

    for article in data_json.values():
        for category, values in article.items():
            compliment = values.get("compliment", "").strip()
            if category not in category_counts:
                category_counts[category] = {"Sí": 0, "No": 0, "No aplica": 0}
            if compliment in category_counts[category]:
                category_counts[category][compliment] += 1

    # Convert the result to a DataFrame
    df_category_counts = pd.DataFrame.from_dict(category_counts, orient='index')
    df_category_counts.index.name = "Category"
    return df_category_counts

def clean_json_output(raw_output):
    # Remove triple backticks and optional "json" marker
    cleaned = re.sub(r"^```(?:json)?\s*|\s*```$", "", raw_output.strip(), flags=re.IGNORECASE | re.MULTILINE)
    return cleaned

class Criterion(TypedDict):
    compliment: Literal["Sí", "No", "No aplica"]
    comentari: str

In [4]:
# Ara cal triar el model a avaluar i afegir la key

# TRIAR EL MODEL
# llm = ChatOpenAI(model="gpt-4.1", temperature=0)   # gpt-5 no suporta temperatura 0
llm = ChatOpenAI(model="gpt-5", temperature=1)  # gpt-4 i 4.1 suporten temperatura 0

# CARREGAR KEY
load_dotenv()
os.environ["OPENAI_API_KEY"] = os.getenv('OPENAI_API_KEY')
openai.api_key = os.getenv('OPENAI_API_KEY')

# Models disponiblees de openai 
models = openai.models.list()
for model in models:
    print(model.id)

gpt-4-0613
gpt-4
gpt-3.5-turbo
gpt-audio
gpt-5-nano
gpt-audio-2025-08-28
gpt-realtime
gpt-realtime-2025-08-28
davinci-002
babbage-002
gpt-3.5-turbo-instruct
gpt-3.5-turbo-instruct-0914
dall-e-3
dall-e-2
gpt-4-1106-preview
gpt-3.5-turbo-1106
tts-1-hd
tts-1-1106
tts-1-hd-1106
text-embedding-3-small
text-embedding-3-large
gpt-4-0125-preview
gpt-4-turbo-preview
gpt-3.5-turbo-0125
gpt-4-turbo
gpt-4-turbo-2024-04-09
gpt-4o
gpt-4o-2024-05-13
gpt-4o-mini-2024-07-18
gpt-4o-mini
gpt-4o-2024-08-06
chatgpt-4o-latest
o1-mini-2024-09-12
o1-mini
gpt-4o-realtime-preview-2024-10-01
gpt-4o-audio-preview-2024-10-01
gpt-4o-audio-preview
gpt-4o-realtime-preview
omni-moderation-latest
omni-moderation-2024-09-26
gpt-4o-realtime-preview-2024-12-17
gpt-4o-audio-preview-2024-12-17
gpt-4o-mini-realtime-preview-2024-12-17
gpt-4o-mini-audio-preview-2024-12-17
o1-2024-12-17
o1
gpt-4o-mini-realtime-preview
gpt-4o-mini-audio-preview
computer-use-preview
o3-mini
o3-mini-2025-01-31
gpt-4o-2024-11-20
computer-use-previe

In [5]:
# A continuació es recuperen els templates que s'utilitzaran per a la prova-benchmarking
# Els templates es troben a la carpeta "prompts"
# Hi ha un total de 7 templates, un per cada tema a avaluar
# Els temes a avaluar són:
# Plurallisme, diversitat, minories, paritat i equilibri de gènere, compromís, veracitat i rigor, independència i imparcialitat i neutralitat, denominacions, model de llenguatge i adequació
# Aquests templates es poden modificar i són els que seran utilizats per fer la prova-benchmarking

# Si es volen modificar els templates, cal fer-ho a la carpeta "prompts"

In [6]:
# Anem a recuperar els templates de la carpeta "prompts"

with open("prompts/1_pluralisme.txt", "r", encoding="utf-8") as file:
    prompt_pluralisme = file.read()

with open("prompts/2_diversitat.txt", "r", encoding="utf-8") as file:
    prompt_diversitat = file.read()

with open("prompts/3_minories.txt", "r", encoding="utf-8") as file:
    prompt_minories = file.read()

with open("prompts/4_paritat.txt", "r", encoding="utf-8") as file:
    prompt_paritat = file.read()

with open("prompts/5_compromis.txt", "r", encoding="utf-8") as file:
    prompt_compromis = file.read()

with open("prompts/6_veracitat.txt", "r", encoding="utf-8") as file:
    prompt_veracitat = file.read()

with open("prompts/7_imparcialitat.txt", "r", encoding="utf-8") as file:
    prompt_imparcialitat = file.read()

In [7]:
pluralisme_promt = PromptTemplate(
    input_variables=["text"],
    template=prompt_pluralisme
)

diversitat_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_diversitat
)

minories_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_minories
)

paritat_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_paritat
)

compromis_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_compromis
)

veracitat_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_veracitat
)

imparcialitat_prompt = PromptTemplate(
    input_variables=["text"],
    template=prompt_imparcialitat
)

# Pipelines
pluralisme_pipeline = pluralisme_promt | llm.with_structured_output(Criterion)
diversitat_pipeline = diversitat_prompt | llm.with_structured_output(Criterion)
minories_pipeline = minories_prompt | llm.with_structured_output(Criterion)
paritat_pipeline = paritat_prompt | llm.with_structured_output(Criterion)
compromis_pipeline = compromis_prompt | llm.with_structured_output(Criterion)
veracitat_pipeline = veracitat_prompt | llm.with_structured_output(Criterion)
imparcialitat_pipeline = imparcialitat_prompt | llm.with_structured_output(Criterion)


def pluralisme_analysis(article_text: str):
    """
    Analyzes the pluralisme aspect of a short article.
    """
    result = pluralisme_pipeline.invoke({"text": article_text})
    return result

def diversitat_analysis(article_text: str):
    """
    Analyzes the diversitat aspect of a short article.
    """
    result = diversitat_pipeline.invoke({"text": article_text})
    return result

def minories_analysis(article_text: str):
    """
    Analyzes the minories aspect of a short article.
    """
    result = minories_pipeline.invoke({"text": article_text})
    return result

def paritat_analysis(article_text: str):
    """
    Analyzes the paritat and equilibri de gènere aspect of a short article.
    """
    result = paritat_pipeline.invoke({"text": article_text})
    return result

def compromis_analysis(article_text: str):
    """
    Analyzes the compromis aspect of a short article.
    """
    result = compromis_pipeline.invoke({"text": article_text})
    return result

def veracitat_analysis(article_text: str):
    """
    Analyzes the veracitat and rigor aspect of a short article.
    """
    result = veracitat_pipeline.invoke({"text": article_text})
    return result

def imparcialitat_analysis(article_text: str):
    """
    Analyzes the independence, impartiality, and neutrality aspect of a short article.
    """
    result = imparcialitat_pipeline.invoke({"text": article_text})
    return result

In [23]:
# BENCHMARK AMB EL MODEL SELECCIONAT

# Analitzem amb el nou model les notícies
analysis_functions = [
    pluralisme_analysis,
    diversitat_analysis,
    minories_analysis,
    paritat_analysis,
    compromis_analysis,
    veracitat_analysis,
    imparcialitat_analysis
]

# Utilitzem els ids que teníem al dataset
ids = data_4_5.keys()
ids = [int(id) for id in ids]

df_benchmark = df[df['id'].isin(ids)].reset_index(drop=True).copy()

# Diccionari per guardar tots els resultats del nou model
all_results = {}
j = 0

# --- Loop over each sampled row ---
for i, row in df_benchmark.iterrows():
    j += 1
    if j % 10 == 0 or j == 1:
        print(f"Processing article {j} of {len(df_benchmark)}...")

    article_id = str(row["id"])
    text = row["cos"]
    merged_result = {}

    for func in analysis_functions:
        try:
            result = func(text)

            # Si és una instància Pydantic, convertim a dict
            if hasattr(result, 'dict'):
                result = result.dict()

            merged_result.update(result)

        except Exception as e:
            print(f"❌ Error in {func.__name__} for article {article_id}: {e}")
            merged_result[func.__name__] = {"error": str(e)}

    # Guardem al diccionari global
    all_results[article_id] = merged_result

Processing article 1 of 134...
Processing article 10 of 134...
Processing article 20 of 134...
Processing article 30 of 134...
Processing article 40 of 134...
Processing article 50 of 134...
Processing article 60 of 134...
Processing article 70 of 134...
Processing article 80 of 134...
Processing article 90 of 134...
Processing article 100 of 134...
Processing article 110 of 134...
Processing article 120 of 134...
Processing article 130 of 134...


In [24]:
# TRIAR EL NOM DEL FITXER ON ES GUARDEN ELS RESULTATS
nom = "resultats_gpt5_2"
with open(f"resultats/{nom}.json", "w", encoding="utf-8") as f:
    json.dump(all_results, f, ensure_ascii=False, indent=2)