In [7]:
# Llibreries necessàries
import os
import openai
import dotenv
from openai import OpenAI
import sys
import pandas as pd
import json
from dotenv import load_dotenv
import re

# Carreguem la API KEY
load_dotenv()
openai_api_key = os.getenv('OPENAI_API_KEY')
openai.api_key = openai_api_key

# Models disponibles
models = openai.models.list()
model_list = [model.id for model in models.data]
print(model_list)

# Carreguem les notícies
df = pd.read_csv('dana_news_dataset.csv')

df.columns


['gpt-4o-audio-preview-2024-10-01', 'gpt-4o-mini-audio-preview', 'gpt-4o-realtime-preview', 'gpt-4o-mini-audio-preview-2024-12-17', 'gpt-4o-mini-realtime-preview', 'dall-e-2', 'gpt-4-1106-preview', 'gpt-3.5-turbo', 'gpt-3.5-turbo-0125', 'gpt-3.5-turbo-instruct', 'babbage-002', 'o1-mini', 'o1-mini-2024-09-12', 'whisper-1', 'gpt-4o-mini-2024-07-18', 'dall-e-3', 'gpt-3.5-turbo-16k', 'gpt-4o-realtime-preview-2024-10-01', 'gpt-4o', 'omni-moderation-latest', 'o1-preview-2024-09-12', 'omni-moderation-2024-09-26', 'tts-1-hd-1106', 'o1-preview', 'gpt-4', 'gpt-4-0613', 'chatgpt-4o-latest', 'tts-1-hd', 'davinci-002', 'text-embedding-ada-002', 'gpt-4-turbo-2024-04-09', 'gpt-4-turbo', 'tts-1', 'tts-1-1106', 'gpt-3.5-turbo-instruct-0914', 'gpt-4o-mini', 'gpt-4-turbo-preview', 'gpt-4o-2024-08-06', 'gpt-4o-mini-realtime-preview-2024-12-17', 'gpt-4o-audio-preview', 'gpt-4o-2024-05-13', 'text-embedding-3-small', 'gpt-4o-2024-11-20', 'text-embedding-3-large', 'gpt-4o-audio-preview-2024-12-17', 'gpt-4o-re

Index(['id', 'entradeta_id', 'cos', 'redactor', 'imatges', 'data', 'entradeta',
       'permatitle', 'avantitol', 'tematiques', 'tags', 'titol',
       'titol_portada', 'comarques'],
      dtype='object')

In [22]:
# Estudiarem com de positives o negatives són les notícies
template_to = '''
The following is a news article. Read it and perform the task that follows. Respond with a JSON object of key-value pairs.


####################


{article}


####################


Task: Determine the overall tone of the article. Is it negative, positive, or neutral?


1. Instruction: Provide a short paragraph summarizing in what ways the article has a negative or positive tone. 
   Key: "reason"
   Value: A paragraph of text.


2. Instruction: Provide a number from -5 to 5, with -5 indicating a very negative tone and 5 indicating a very positive tone. A value of 0 indicates that the article has a neutral tone.
   Key: "tone"
   Value: An integer number from -5 to 5.


Do not return anything except the JSON object of key-value pairs as output.
'''

In [23]:
client = OpenAI()


def generate_api_response_model_4o(prompt):
    completion = client.chat.completions.create(
      model="gpt-4o",
      messages=[
        {"role": "developer", "content": "Contestaràs tot en català."},
        {"role": "user", "content": prompt}
      ]
    )
    response_content = completion.choices[0].message.content
    return response_content

In [24]:
def generate_prompt(article, template):
    article_cos = article['cos']
    article_cos = str(article_cos)
    prompt = template.replace("{article}", article_cos)
    return prompt

In [26]:
# length df
length = len(df)

In [25]:
def repair_json(json_string):
    # Try to extract JSON (objects or arrays)
    json_match = re.search(r"(\{.*\}|\[.*\])", json_string, re.DOTALL)
    if json_match:
        json_content = json_match.group(0)
        try:
            parsed_json = json.loads(json_content)
            print("Valid JSON after extraction:", parsed_json)
            return parsed_json
        except json.JSONDecodeError as e:
            raise ValueError("No JSON object found in response")
    else:
        #throw error
        raise ValueError("No JSON object found in response")


In [28]:
results = {}
i = 0

for _, row in df.iterrows():
    print(f"Processing row {i} of {length}")
    i +=1
    prompt = generate_prompt(row, template_to)
    response = generate_api_response_model_4o(prompt)
    id = row['id']

    # Check if the response is valid JSON
    try:
        response_json = json.loads(response)  # Try to parse the response as JSON
        results[row['id']] = response_json   # Store the parsed JSON under the id
    except json.JSONDecodeError:
        try:
            response_json = repair_json(response)
            results[row['id']] = response_json
        except ValueError as e:
            print("FATAL ERROR: Could not extract JSON from response")
            results[row['id']] = {"error": "Invalid JSON response"}
            break

# Convert the results dictionary to a JSON string
final_json = json.dumps(results, indent=4)
print(final_json)

Processing row 0 of 419
Valid JSON after extraction: {'reason': "L'article té un to negatiu ja que descriu els danys importants a les oliveres causats per fred intens, nevades i les seves conseqüències devastadores. Es comparen les pèrdues potencials amb les gelades de 2001, que van resultar en pèrdues milionàries. Les declaracions dels experts i sindicats transmeten preocupació i urgència per l'impacte econòmic i agrícola del temporal, així com la necessitat d'acció per part de les autoritats.", 'tone': -4}
Processing row 1 of 419
Valid JSON after extraction: {'reason': "L'article té un to positiu ja que destaca l'esforç del Departament d'Agricultura per avaluar i mitigar els danys causats pel temporal Filomena a les oliveres. S'està implementant un procés estructurat i coordinat per recollir informació i proporcionar ajuts als productors afectats per restaurar la capacitat productiva de les seves explotacions. A més, es treballa en conjunt amb les organitzacions agràries per adaptar 

In [29]:
# Save to a file
with open('results_to_dana.json', 'w') as json_file:
    json_file.write(final_json)

print("JSON file has been saved as 'results.json'.")

JSON file has been saved as 'results.json'.


In [31]:
import pandas as pd

# Load the DataFrame from a JSON file
df = pd.read_json('results_to_dana.json')

print("DataFrame loaded from JSON:")
df

DataFrame loaded from JSON:


Unnamed: 0,3070785,3076424,3080155,3080247,3099021,3112370,3116030,3116023,3116476,3116887,...,3323908,3324336,3324222,3324568,3324564,3324310,3322793,3323970,3324650,3324593
reason,L'article té un to negatiu ja que descriu els ...,L'article té un to positiu ja que destaca l'es...,L'article té un to predominantment neutral. De...,"L'article té un to principalment neutral, ja q...","L'article té un to majoritàriament neutre, ja ...",The tone of the article is generally positive ...,"L'article té un to clarament negatiu, ja que d...",L'article té un to lleugerament negatiu perquè...,"L'article té un to més aviat neutral, però amb...","L'article té un to majoritàriament negatiu, ja...",...,L'article té un to majoritàriament negatiu ja ...,The article has a negative tone as it focuses ...,"L'article transmet un to positiu global, ja qu...",L'article té un to predominantment positiu mal...,"L'article té un to majoritàriament negatiu, ja...",L'article es presenta amb un to positiu ja que...,L'article té un to clarament negatiu. Es desta...,"L'article presenta un to negatiu, ja que se ce...","L'article té un to majoritàriament negatiu, re...","L'article té un to positiu, ja que descriu un ..."
tone,-4,3,0,0,0,3,-4,-1,1,-4,...,-4,-3,4,2,-4,4,-4,-4,-4,4


In [33]:
# We do an average of the tone (tone is a row), and the columns are each article
df = df.T
df['tone'] = df['tone'].astype(int)
average_tone = df['tone'].mean()
print(f"Average tone: {average_tone}")

Average tone: -1.090692124105012


In [35]:
# We compute the average tone from the 200 middle articles
df_middle = df[100:200]
average_tone_middle = df_middle['tone'].mean()
print(f"Average tone of the middle 200 articles: {average_tone_middle}")

Average tone of the middle 200 articles: -0.82


In [36]:
# We compute the average tone from the 100 last articles
df_last = df[350:]
average_tone_last = df_last['tone'].mean()
print(f"Average tone of the last 100 articles: {average_tone_last}")   

Average tone of the last 100 articles: -0.391304347826087
