In [83]:
# Import some libraries

import requests
import json
import pandas as pd
from datetime import datetime
import re
import nltk

from nltk.sentiment.vader import SentimentIntensityAnalyzer
nltk.download('vader_lexicon')

[nltk_data] Downloading package vader_lexicon to
[nltk_data]     C:\Users\c24085394\AppData\Roaming\nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!


True

In [84]:
# API endpoint from the newly deployed service

API_URL = "https://zfgp45ih7i.execute-api.eu-west-1.amazonaws.com/sandbox/api/search"
API_KEY = "STU38746G38B7RB46GBER"


headers = {
    "Content-Type": "application/json",
    "x-api-key": API_KEY
}

query_text = "gaza"
payload = {
    "query_text": query_text,
    "result_size": 100,  # One big request
    "include_highlights": True,
    "include_smart_tags": False
}

try:
    response = requests.post(API_URL, headers=headers, data=json.dumps(payload), timeout=15)
    response.raise_for_status()
    json_response = response.json()
except Exception as e:
    print(f"❌ Request failed: {e}")
    json_response = None

if not json_response or 'results' not in json_response:
    raise ValueError("❌ No results returned from the API.")



In [86]:
df = pd.json_normalize(json_response['results'])
df['published_date'] = pd.to_datetime(df['timestamp'], errors='coerce').dt.date



In [87]:
## Clean data

def clean_text(text):
    """
    - Convert to lowercase
    - Remove URLs
    - Remove punctuation / non-alpha
    - Collapse multiple spaces
    """
    if not isinstance(text, str):
        return ""
    # Remove URLs (very basic)
    text = re.sub(r"http\S+|www\.\S+", "", text)
    # Lowercase
    text = text.lower()
    # Keep only letters and spaces
    text = re.sub(r"[^a-z\s]", " ", text)
    # Collapse multiple spaces
    text = re.sub(r"\s+", " ", text).strip()
    return text


df['clean_summary'] = df['summary'].apply(clean_text)


In [88]:
## Sentiment analysis example

sia = SentimentIntensityAnalyzer()

def get_sentiment_scores(text):
    """
    Returns a dict with these keys:
       - neg: negative sentiment score
       - neu: neutral score
       - pos: positive score
       - compound: normalized, weighted composite (-1 to +1)
    """
    return sia.polarity_scores(text)



df['clean_summary'] = df['summary'].apply(clean_text)
df['sentiment'] = df['clean_summary'].apply(get_sentiment_scores)
df['sent_compound'] = df['sentiment'].apply(lambda d: d['compound'])


daily_best = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmax()]).reset_index(drop=True)
daily_worst = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmin()]).reset_index(drop=True)


  daily_best = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmax()]).reset_index(drop=True)
  daily_worst = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmin()]).reset_index(drop=True)


In [89]:
# Find index of the most positive (max compound) and most negative (min compound) summaries
max_idx = df['sent_compound'].idxmax()
min_idx = df['sent_compound'].idxmin()

# Retrieve the scores
max_score = df.loc[max_idx, 'sent_compound']
min_score = df.loc[min_idx, 'sent_compound']

# Print the full clean summaries along with their sentiment scores
print("Most positive summary (compound = {:.3f}):\n".format(max_score))
print(df.loc[max_idx, 'clean_summary'])


print("\n\nMost negative summary (compound = {:.3f}):\n".format(min_score))
print(df.loc[min_idx, 'clean_summary'])

Most positive summary (compound = 0.875):

palestinian rihan sherab is preparing a different holiday in the southern gaza strip in the midst of ongoing israeli attacks and shelling serab and his family found shelter in a tent in the mewasi area where he produces toys for sheep specially for eid al fitr every toy is a symbol of craftsmanship and love adding to the holiday cheer for children


Most negative summary (compound = -0.986):

the deadly attacks on desperate civilians trying to access the scarce amount of food aid in gaza are unacceptable says high commissioner volker t rk attacks directed against civilians constitute a grave breach of international law a war crime he says israel kills palestinians the gaza ministry of health reported that palestinian people were killed in the early hours of tuesday when israeli forces fired on a group of people moving to an aid distribution site in the southern gaza strip


In [90]:
daily_top = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmax()]).reset_index(drop=True)

  daily_top = df.groupby('published_date').apply(lambda g: g.loc[g['sent_compound'].idxmax()]).reset_index(drop=True)


In [91]:
print("\n📅 Best & Worst News Articles Each Day:\n")

for date in sorted(df['published_date'].dropna().unique()):
    print(f"\n📅 Date: {date}")
    
    best = daily_best[daily_best['published_date'] == date].iloc[0]
    print(f"\n✅ Most Positive Article:")
    print(f"🔹 Title: {best['title']}")
    print(f"📝 Summary: {best['summary'][:300]}{'...' if len(best['summary']) > 300 else ''}")
    print(f"🙂 Sentiment Score: {round(best['sent_compound'], 3)}")
    print(f"🔗 URL: {best['url']}")

    worst = daily_worst[daily_worst['published_date'] == date].iloc[0]
    print(f"\n❌ Most Negative Article:")
    print(f"🔹 Title: {worst['title']}")
    print(f"📝 Summary: {worst['summary'][:300]}{'...' if len(worst['summary']) > 300 else ''}")
    print(f"🙁 Sentiment Score: {round(worst['sent_compound'], 3)}")
    print(f"🔗 URL: {worst['url']}")
    
    print("\n" + "=" * 80 + "\n")


📅 Best & Worst News Articles Each Day:


📅 Date: 2025-06-03

✅ Most Positive Article:
🔹 Title: Gaza rallies in Hamburg: Unusual alliances
📝 Summary: The Left Party, the Green Youth and the Jusos are joining the Council of Islamic Communities in Hamburg (Schura) The situation in Gaza continued to escalate, which is why it was now necessary to “also draw attention to it in the middle of society” Fatih Yildiz, Chairman of Schura Hamburg, said.
🙂 Sentiment Score: 0.402
🔗 URL: https://taz.de/Kundgebungen-in-Hamburg/!6088456/

❌ Most Negative Article:
🔹 Title: Israel Continues Attacks on Gaza
📝 Summary: Israeli military strikes in different parts of the Gaza Strip have killed 35 Palestinians, including children. Seven Palestinians were killed and many others were injured in an Israeli attack on a camp of displaced persons. Six Palestinians have been killed in air strikes in eastern Gaza and the tow...
🙁 Sentiment Score: -0.976
🔗 URL: https://www.haberaktuel.com/israil-in-gazze-ye-yonelik-sa