In [2]:
# Llibreries necessàries
import os
import openai
import dotenv
from openai import OpenAI
import sys
import pandas as pd
import json
from dotenv import load_dotenv
import re

# Carreguem la API KEY
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = openai_api_key

# Models disponibles
models = openai.models.list()
model_list = [model.id for model in models.data]
print(model_list)

# Carreguem les notícies
df = pd.read_csv('euaeleccions_news_dataset.csv')

df.columns


['gpt-4o-audio-preview-2024-10-01', 'gpt-4o-mini-audio-preview', 'gpt-4o-realtime-preview', 'gpt-4o-mini-audio-preview-2024-12-17', 'gpt-4o-mini-realtime-preview', 'dall-e-2', 'gpt-4o-mini-2024-07-18', 'o1-preview-2024-09-12', 'gpt-4o-mini', 'gpt-4-1106-preview', 'gpt-3.5-turbo', 'gpt-3.5-turbo-0125', 'gpt-3.5-turbo-instruct', 'gpt-4-0613', 'babbage-002', 'o1-mini', 'o1-mini-2024-09-12', 'whisper-1', 'dall-e-3', 'gpt-4o-2024-08-06', 'gpt-4o', 'gpt-3.5-turbo-16k', 'gpt-4o-realtime-preview-2024-10-01', 'omni-moderation-latest', 'omni-moderation-2024-09-26', 'tts-1-hd-1106', 'o1-preview', 'gpt-4', 'chatgpt-4o-latest', 'tts-1-hd', 'davinci-002', 'text-embedding-ada-002', 'gpt-4-turbo-2024-04-09', 'gpt-4-turbo', 'tts-1', 'tts-1-1106', 'gpt-3.5-turbo-instruct-0914', 'gpt-4-turbo-preview', 'gpt-4o-mini-realtime-preview-2024-12-17', 'gpt-4o-audio-preview', 'gpt-4o-2024-05-13', 'text-embedding-3-small', 'gpt-4o-2024-11-20', 'gpt-4o-audio-preview-2024-12-17', 'gpt-4o-realtime-preview-2024-12-17'

Index(['data', 'entradeta', 'permatitle', 'avantitol', 'tematiques', 'tags',
       'id', 'titol', 'imatges', 'titol_portada', 'comarques', 'cos',
       'redactor'],
      dtype='object')

In [3]:
# Estudiarem com de positives o negatives són les notícies
template_to = '''
The following is a news article about US 2024 Elections in Catalan language. Read it and perform the task that follows. Respond with a JSON object of key-value pairs.


####################


{article}


####################


Task: Determine the political leaning of this article within the U.S. political context. Is it supporting the Democrat party or the Republican party? Supporting a party can mean supporting its viewpoints, politicians, or policies. Provide reasoning for your answer.


1. Instruction: Give a short paragraph summarizing in what ways the article supports the Democrat party or the Republican party.
Key: "reason"
Value: A paragraph of text in catalan language.


2. Instruction: Give a number from -5 to 5, with -5 indicating strong support for Democrats and 5 indicating strong support for Republicans. A value of 0 indicates that the article has no clear political leaning towards either side.
Key: "lean"
Value: An integer number from -5 to 5.


Do not return anything except the JSON object of key-value pairs as output.
'''

In [4]:
client = OpenAI()


def generate_api_response_model_4o(prompt):
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "developer", "content": "Contestaràs tot en català."},
        {"role": "user", "content": prompt}
      ]
    )
    response_content = completion.choices[0].message.content
    return response_content

In [5]:
def generate_prompt(article, template):
    article_cos = article['cos']
    article_cos = str(article_cos)
    prompt = template.replace("{article}", article_cos)
    return prompt

In [6]:
# length df
length = len(df)

In [7]:
def repair_json(json_string):
    # Try to extract JSON (objects or arrays)
    json_match = re.search(r"(\{.*\}|\[.*\])", json_string, re.DOTALL)
    if json_match:
        json_content = json_match.group(0)
        try:
            parsed_json = json.loads(json_content)
            print("Valid JSON after extraction:", parsed_json)
            return parsed_json
        except json.JSONDecodeError as e:
            raise ValueError("No JSON object found in response")
    else:
        #throw error
        raise ValueError("No JSON object found in response")


In [8]:
results = {}
i = 0

for _, row in df.iterrows():
    print(f"Processing row {i} of {length}")
    i +=1
    prompt = generate_prompt(row, template_to)
    response = generate_api_response_model_4o(prompt)
    id = row['id']

    # Check if the response is valid JSON
    try:
        response_json = json.loads(response)  # Try to parse the response as JSON
        results[row['id']] = response_json   # Store the parsed JSON under the id
    except json.JSONDecodeError:
        try:
            response_json = repair_json(response)
            results[row['id']] = response_json
        except ValueError as e:
            print("FATAL ERROR: Could not extract JSON from response")
            results[row['id']] = {"error": "Invalid JSON response"}
            break

# Convert the results dictionary to a JSON string
final_json = json.dumps(results, indent=4)
print(final_json)

Processing row 0 of 134
Valid JSON after extraction: {'reason': "L'article mostra un cert suport al Partit Demòcrata en destacar que tot i les crítiques i un mal debat, Joe Biden s'ha mantingut ferm en la seva decisió de continuar amb la seva candidatura. Es fa ressò del suport que ha rebut de figures destacades com Barack Obama i Hillary Clinton, que subratllen la seva experiència i compromís amb la gent corrent. També es menciona que l'actuació de Biden en el debat va ser afectada per la grip, minimitzant així els impactes negatius percebuts del seu rendiment.", 'lean': -3}
Processing row 1 of 134
Valid JSON after extraction: {'reason': "L'article té una inclinació favorable cap al Partit Republicà, ja que destaca la mala actuació de Joe Biden en el debat i la sensació d'inseguretat sobre la seva capacitat per liderar el país, alhora que presenta Donald Trump com a més segur i en forma. A més, l'article menciona les expressions negatives dels comentaristes de la CNN sobre Biden, incl

In [10]:
# Save to a file
with open('results_to_politic_eleccions.json', 'w') as json_file:
    json_file.write(final_json)

print("JSON file has been saved as 'results_to_politic_eleccions.json'.")

JSON file has been saved as 'results_to_politic_eleccions.json'.


In [11]:
# Load the DataFrame from a JSON file
df = pd.read_json('results_to_ai.json')

print("DataFrame loaded from JSON:")
df

DataFrame loaded from JSON:


Unnamed: 0,3300883,3300754,3300710,3300944,3301672,3301920,3303320,3303411,3303412,3303381,...,3321078,3321513,3321719,3321704,3321970,3322269,3323202,3323391,3323520,3323682
reason,L'article mostra un cert suport al Partit Demò...,L'article té una inclinació favorable cap al P...,L'article destaca elements negatius tant de Jo...,L'article sembla tenir un enfocament més críti...,L'article destaca els dubtes interns i les crí...,L'article sembra donar suport al Partit Demòcr...,L'article no mostra suport clar cap al Partit ...,"L'article manté un to neutre i objectiu, limit...",L'article sembla actuar de manera neutral i eq...,L'article proporciona una visió històrica sobr...,...,L'article presenta una aparent neutralitat en ...,L'article sembla inclinar-se cap al Partit Rep...,L'article presenta els resultats electorals al...,L'article presenta una visió crítica cap a Don...,L'article destaca diverses crítiques i preocup...,"L'article destaca la figura de Chris Wright, d...",L'article mostra un suport decidit al Partit R...,L'article descriu favorablement la figura de S...,L'article sembla tenir una inclinació a favor ...,L'article sembla tenir una inclinació favorabl...
lean,-3,3,0,-2,1,-2,0,0,0,0,...,0,4,4,-4,-3,3,4,3,-3,3


In [16]:
# We do an average of the tone (tone is a row), and the columns are each article
df = df.T
df['lean'] = df['lean'].astype(int)
average_lean = df['lean'].mean()
print(f"Average lean: {average_lean}")

Average lean: -0.9925373134328358


In [17]:
# We compute the average tone from the 200 middle articles
df_middle = df[100:]
average_tone_middle = df_middle['lean'].mean()
print(f"Average lean of the middle 200 articles: {average_tone_middle}")

Average lean of the middle 200 articles: 0.029411764705882353
