In [1]:
import pandas as pd
import google.generativeai as genai
import time
import os
import json
from dotenv import load_dotenv


load_dotenv()
API_KEY = os.getenv("GEMINI_API_KEY")
CSV_FILE_PATH = 'ai_news_processed.csv'

if not API_KEY:
    raise ValueError("API key not found.")

genai.configure(api_key=API_KEY)
model = genai.GenerativeModel('models/gemini-2.5-flash-lite')

In [2]:
# returns a list of (label, score) tuples
def get_sentiment_data_in_batch(headlines: list) -> list:
    numbered_headlines = "\n".join([f"{i+1}. {headline}" for i, headline in enumerate(headlines)])

    
    prompt = f"""
    Analyze the sentiment of each news headline below. For each headline, provide:
    1. A 'label' ('Positive', 'Negative', or 'Neutral').
    2. A 'score', which is a float value between -1.00 (most negative) and 1.00 (most positive), where 0.00 is neutral, all values rounded to exactly two decimal places.

    Provide the output as a single, valid JSON array of objects. Each object must have a "label" key and a "score" key.

    Headlines:
    {numbered_headlines}

    JSON Array of Objects:
    """
    
    try:
        response = model.generate_content(prompt)
        cleaned_text = response.text.strip().replace("```json", "").replace("```", "")
        
        # list of dictionaries, e.g., [{"label": "Positive", "score": 0.8}, ...]
        parsed_json = json.loads(cleaned_text)
        
        # json to tuple
        results = []
        for item in parsed_json:
            label = item.get("label", "Error")
            score = item.get("score", 0.0)
            results.append((label, score)) #save as tuple

        if len(results) == len(headlines):
            return results
        else:
            print("Warning: Model returned a different number of sentiments than expected.")
            return [("Error", 0.0)] * len(headlines)

    except Exception as e:
        print(f"An error occurred during API call or JSON parsing: {e}")
        return [("Error", 0.0)] * len(headlines)

In [8]:
# Load the dataset
try:
    df = pd.read_csv(CSV_FILE_PATH)
except FileNotFoundError:
    print(f"The file '{CSV_FILE_PATH}' was not found.")
    exit()

if 'processed_text' not in df.columns:
    print("Error: The CSV file must contain a 'processed_text' column.")
    exit()

# batch processing to reduce number of api calls
all_labels = [] 
all_scores = []
batch_size = 10 
headlines_to_process = df['processed_text'].tolist()

for i in range(0, len(headlines_to_process), batch_size):
    batch = headlines_to_process[i:i+batch_size]
    print(f"Processing batch {i//batch_size + 1}/{(len(headlines_to_process) + batch_size - 1)//batch_size}...")
    
    # Get a list of (label, score) tuples for the current batch
    results_for_batch = get_sentiment_data_in_batch(batch)
    
    #  Unpack the tuples into their respective lists for adding into csv
    for label, score in results_for_batch:
        all_labels.append(label)
        all_scores.append(score)
    
    time.sleep(5) 

# add generated sentiment columns to the DataFrame
df['sentiment_score'] = all_scores
df['sentiment'] = all_labels



print("\n--- Sentiment Analysis Results ---")
print(df[['processed_text', 'sentiment', 'sentiment_score']].head()) # Show the new columns
print("------------------------------------")



Processing batch 1/30...
Processing batch 2/30...
Processing batch 3/30...
Processing batch 4/30...
Processing batch 5/30...
Processing batch 6/30...
Processing batch 7/30...
Processing batch 8/30...
Processing batch 9/30...
Processing batch 10/30...
Processing batch 11/30...
Processing batch 12/30...
Processing batch 13/30...
Processing batch 14/30...
Processing batch 15/30...
Processing batch 16/30...
Processing batch 17/30...
Processing batch 18/30...
Processing batch 19/30...
Processing batch 20/30...
Processing batch 21/30...
Processing batch 22/30...
Processing batch 23/30...
Processing batch 24/30...
Processing batch 25/30...
Processing batch 26/30...
Processing batch 27/30...
Processing batch 28/30...
Processing batch 29/30...
Processing batch 30/30...

--- Sentiment Analysis Results ---
                                      processed_text sentiment  \
0  aiedgelitertnightly dev litert mobile embedded...   Neutral   
1  hcltech q review ai led growth drive record wi...  Positiv

In [9]:
df.head()

Unnamed: 0,source,title,description,url,publishedAt,full_text,processed_text,sentiment_score,sentiment
0,"{'id': None, 'name': 'Pypi.org'}",ai-edge-litert-nightly 2.0.4.dev20251013,LiteRT is for mobile and embedded devices.,https://pypi.org/project/ai-edge-litert-nightl...,2025-10-14 11:43:13,ai-edge-litert-nightly 2.0.4.dev20251013 LiteR...,aiedgelitertnightly dev litert mobile embedded...,0.0,Neutral
1,"{'id': None, 'name': 'Ndtvprofit.com'}",HCLTech Q2 Review: AI Led Growth Drives Record...,HCLTech Ltd. delivered a resilient Q2 FY26 per...,https://www.ndtvprofit.com/research-reports/hc...,2025-10-14 11:43:11,HCLTech Q2 Review: AI Led Growth Drives Record...,hcltech q review ai led growth drive record wi...,0.75,Positive
2,"{'id': None, 'name': 'Independent.ie'}",The Big Tech Show: How to use Chat GPT for tot...,What are the best tips for beginners using AI ...,https://www.independent.ie/podcasts/the-big-te...,2025-10-14 11:43:03,The Big Tech Show: How to use Chat GPT for tot...,big tech show use chat gpt total beginner best...,-0.5,Negative
3,"{'id': None, 'name': 'Realstorygroup.com'}",Time to Reconsider Your CMS Platform?,I’m very excited on your behalf about the next...,https://www.realstorygroup.com/Blog/time-recon...,2025-10-14 11:41:11,Time to Reconsider Your CMS Platform? I’m very...,time reconsider cm platform im excited behalf ...,0.6,Positive
4,"{'id': None, 'name': 'Yahoo Entertainment'}",Google says to invest $15 billion in AI data c...,,https://consent.yahoo.com/v2/collectConsent?se...,2025-10-14 11:40:41,Google says to invest $15 billion in AI data c...,google say invest billion ai data centre capac...,0.55,Positive


In [10]:
#saving
output_csv_path = 'ai_news_with_sentiment_scores.csv'
df.to_csv(output_csv_path, index=False)
print(f"\n✅ Successfully saved the results to '{output_csv_path}'")


✅ Successfully saved the results to 'ai_news_with_sentiment_scores.csv'
