## Objective of notebook:

- To explore the keyword search and retrieval for the relevant articles.
- In order to make the search process faster, likely need some mechanism to generate some embeddings for the keywords or tags.
- Currently, I am using a hierarchical clustering to run clustering on all the articles.

In [1]:
import json
import yaml
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import torch
import requests
from torch import nn
from tqdm import trange

# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)
    
load_dotenv()
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")

In [2]:
# Explore the current data 
files = ["../data/test_data/test.json", "../data/test_data/train.json"]
def combine_json(files):
    combined_data = []
    for file in files:
        with open(file, 'r', encoding='utf-8') as fin:
            # Load data from the file and append it to the combined list
            data = json.load(fin)
            combined_data.extend(data)
    return combined_data
db = combine_json(files)

In [4]:
cos_sim = nn.CosineSimilarity(dim=0)

def get_cosine_tags(test_article, train_article, limit):
    # Convert lists to tensors
    embeddings_1_tensor = torch.tensor(eval(test_article['phrase_Bert_tags_embeddings']))
    embeddings_2_tensor = torch.tensor(eval(train_article['phrase_Bert_tags_embeddings']))
    
    # Store cosine similarities
    cosine_similarities = []

    # Compute cosine similarity for each pair of embeddings and store the results
    for emb1 in embeddings_1_tensor:
        for emb2 in embeddings_2_tensor:
            cosine_similarities.append(cos_sim(emb1, emb2).item())

    # Sort the cosine similarities and select the 10 smallest
    sorted_cosine_similarities = sorted(cosine_similarities)
    sum_similarities = sum(sorted_cosine_similarities[:limit])
    return sum_similarities

In [5]:
train_ids = ['st_1164990',
'st_1165295',
'st_1164227',
'st_1158236',
'st_1158485',
'st_1159581',
'st_1158779',
'st_1157082',
'st_1160289']
test_indexes = []
for id in train_ids:
    for i in range(len(db)):
        if db[i]['st_id'] == id:
            test_indexes.append(i)
        elif db[i]['st_id'] == "st_1159793":
            train_index = i        

print(train_index)
print(test_indexes)

1033
[578, 1932, 413, 1842, 1101, 45, 1097, 539, 1646]


In [6]:
train_db = []
test_article = db[train_index]
for index in test_indexes:
    train_db.append(db[index])

In [7]:
# Go through the train_db to get cosine similarities of each article with respect to the test
def get_similar_by_PBtags(test_article, db):
    print("Computing similarities...\n")
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Tags'] = db[i]['tags']
        dic['cosine_score'] = get_cosine_tags(test_article, db[i], 7)
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print("Tags of test article: " + str(test_article['tags']) + "\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=10:
            break
        print(tag['Title'])
        print(tag['Tags'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:10]

def get_similar_by_MPtags(test_article, db):
    print("Computing similarities...\n")
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Tags'] = db[i]['tags']
        dic['cosine_score'] = get_cosine_tags_mpnet(test_article, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print("Tags of test article: " + str(test_article['tags']) + "\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=10:
            break
        print(tag['Title'])
        print(tag['Tags'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:10]

def get_cosine_tags_mpnet(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['tags_embeddings'])))
    return similarity

def get_similar_by_MPtags(test_article, timeline_header,db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)

    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Tags'] = db[i]['tags']
        dic['cosine_score'] = get_cosine_tags_mpnet(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print("Tags of test article: " + str(test_article['tags']) + "\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=10:
            break
        print(tag['Title'])
        print(tag['Tags'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:10]


In [93]:
def dense_embed(payload: str) -> str:
        response = requests.post(dense_embedder_api, headers={"Authorization": f"Bearer {hf_key}"}, json=payload)
        return response.json()

def get_cosine_titles(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['Title_embeddings'])))
    return similarity

def get_similar_by_titles(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_titles(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the titles: {timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20] 

def get_cosine_text(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['embeddings'])))
    return similarity

def get_similar_by_text(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_text(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the texts: \n{timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20]


timeline_header = "Humanitarian Aid to palestinians"
similar_article_text = get_similar_by_text(test_article, timeline_header, db)
similar_articles_titles = get_similar_by_titles(test_article, timeline_header, db)

Computing similarities...



100%|██████████| 2007/2007 [00:01<00:00, 1032.44it/s]


Title of test article: Japan to provide $88 million in additional humanitarian aid to Palestinians
Below are the best articles that are closest to this desired timeline based on the texts: 
Humanitarian Aid to palestinians

The nightmare of delivering aid during this Israel-Hamas war
tensor(0.7969)

Palestinian Americans fundraise for Gaza, as aid groups receive record donations
tensor(0.7734)

Scaling up Gaza aid effort faces tangle of challenges
tensor(0.7285)

S’pore Red Cross gives $270k worth of relief aid to victims of Hamas-Israel war in Gaza
tensor(0.6987)

Japan to provide $88 million in additional humanitarian aid to Palestinians
tensor(0.6887)

Displaced Gazans live in dust, fear and hunger
tensor(0.6729)

As Israel bombards Gaza, bakeries run out of bread, water runs low
tensor(0.6697)

Gaza breakdown in order halts four aid distribution centres-UNWRA
tensor(0.6644)

Israel to allow some fuel into Gaza after US push
tensor(0.6570)

Israel PM Netanyahu rejects Gaza ceasefire

100%|██████████| 2007/2007 [00:01<00:00, 1121.18it/s]


Title of test article: Japan to provide $88 million in additional humanitarian aid to Palestinians
Below are the best articles that are closest to this desired timeline based on the titles: Humanitarian Aid to palestinians

The nightmare of delivering aid during this Israel-Hamas war
tensor(0.7704)

WHO says it needs urgent access to Gaza to deliver aid, medical supplies
tensor(0.6840)

Scaling up Gaza aid effort faces tangle of challenges
tensor(0.6631)

Singapore supports humanitarian aid, calls for civilian lives to be protected in Gaza
tensor(0.6582)

Gaza hospital for Palestinians’ medical needs, not Hamas operations: Indonesia
tensor(0.6368)

Japan to provide $88 million in additional humanitarian aid to Palestinians
tensor(0.6338)

Palestinian Americans fundraise for Gaza, as aid groups receive record donations
tensor(0.6186)

S’pore Red Cross gives $270k worth of relief aid to victims of Hamas-Israel war in Gaza
tensor(0.6092)

Displaced Gazans live in dust, fear and hunger
ten

In [94]:
def get_titles_str(similar_articles_titles, similar_article_text):
    combined_similars = []
    for i in range(len(similar_article_text)):
        combined_similars.append(similar_article_text[i])
        combined_similars.append(similar_articles_titles[i])
    
    # Initialize a set to track seen titles
    seen_titles = set()

    # List comprehension to remove duplicates based on 'Title'
    unique_list = []
    for item in combined_similars:
        title = item["Title"]
        if title not in seen_titles:
            seen_titles.add(title)
            unique_list.append(item)

    return unique_list
combined_titles = get_titles_str(similar_articles_titles, similar_article_text)
print(timeline_header)
combined_titles

Humanitarian Aid to palestinians


[{'id': 'st_1157276',
  'Title': 'The nightmare of delivering aid during this Israel-Hamas war',
  'Text': 'The 2.3 million people who live in Gaza are facing economic isolation and experiencing incessant bombardment. Their supplies of essential resources, including food and water, are quickly dwindling.In response, US President Joe Biden has pledged US$100 million (S$136 million) in humanitarian assistance for the citizens of Gaza.Mr Biden’s promise raises fundamental questions regarding the delivery of humanitarian aid in a war zone. Political constraints, ethical quandaries and the need to protect the security of aid workers and local communities always make the effort a logistical nightmare.In this specific predicament, US officials have to choose a strategy to deliver the aid without the perception of benefiting Hamas, a group the US and Israel both classify as a terrorist organisation.LogisticsWhen aiding people in war zones, you can’t just send money, a development strategy call

## Using a re ranker model to re rank these. 

In [95]:
from langchain_groq import ChatGroq
# Load environment variables
load_dotenv()
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")

In [96]:
from sentence_transformers import CrossEncoder
cross_encoder = CrossEncoder(
    "cross-encoder/ms-marco-TinyBERT-L-2-v2", max_length=512, device="cpu"
)



## Idea for the use of a re ranker with the hybrid search
- The clustering could retrieve maybe around 40 ish articles about some topic. However, not all of them are relevant. 
- The re ranker and a threshold of a similarity score would be used to find some sort of similarity score to the required timeline. 

In [97]:
timeline_header

'Humanitarian Aid to palestinians'

In [105]:
unranked_docs = [(timeline_header, doc['Text']) for doc in combined_titles]
# Get the scores
scores = cross_encoder.predict(unranked_docs).tolist()

for i in range(len(combined_titles)):
    # Criteria that it has to be positive relationship between the timeline header and the article
    if scores[i]>0:
        combined_titles[i]['reranked_score'] = scores[i]
combined_articles = [article for article in combined_titles if 'reranked_score' in article]
combined_articles

[{'id': 'st_1157276',
  'Title': 'The nightmare of delivering aid during this Israel-Hamas war',
  'Text': 'The 2.3 million people who live in Gaza are facing economic isolation and experiencing incessant bombardment. Their supplies of essential resources, including food and water, are quickly dwindling.In response, US President Joe Biden has pledged US$100 million (S$136 million) in humanitarian assistance for the citizens of Gaza.Mr Biden’s promise raises fundamental questions regarding the delivery of humanitarian aid in a war zone. Political constraints, ethical quandaries and the need to protect the security of aid workers and local communities always make the effort a logistical nightmare.In this specific predicament, US officials have to choose a strategy to deliver the aid without the perception of benefiting Hamas, a group the US and Israel both classify as a terrorist organisation.LogisticsWhen aiding people in war zones, you can’t just send money, a development strategy call

In [106]:
sorted_articles = sorted(combined_articles, key=lambda x: x['reranked_score'], reverse=True)
if len(sorted_articles)>12:
    sorted_articles = sorted_articles[:12]

In [139]:
# Groq to generate main event for each article
import re
import json
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate

chat_model = "llama3-8b-8192"
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)

In [153]:
def clean_llm_output(output):
        text = output.parts[0].text.replace("```", '').replace('json','')
        result = json.loads(text)
        return result
    
def groq_event(date, title, text):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class summarized_event(BaseModel):
        main_event: str = Field(description="Main event of the article")
        event_date: str = Field(description="Date which the main event occured in YYYY-MM-DD")
    
    parser = JsonOutputParser(pydantic_object=summarized_event)

    
    template = '''
You are a news article editor. Analyse the article deeply, and describe the main event of the article below in one short sentence.
Using this main event and the publication date, identify the date at when this main event occured.
You should use any time references such as "last week," "last month," or specific dates. 
If the article does not specify the exact date, save the date in the YYYY-MM-XX or YYYY-XX-XX format.
Do not provide any explanations for your answer.

Publication Date:
{date}
Article Title:
{title}
Article Text:
{text}

{format_instructions}
Before you return the answer, ensure and double check that you have adhered the answer format instructions strictly.
'''
    prompt = PromptTemplate(
        template=template,
        input_variables=["date", "title", "text"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    final_prompt = prompt.format(date=date, title=title, text=text)
    response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
    
    cleaned_output = clean_llm_output(response)
    print(cleaned_output)
    return cleaned_output

In [156]:
for i in trange(len(sorted_articles)):
    article_text = sorted_articles[i]['Text']
    article_title = sorted_articles[i]['Title']
    article_date = sorted_articles[i]['Date']
    displayed_event = groq_event(article_date, article_title, article_text)
    sorted_articles[i]['Event'] = displayed_event['main_event']
    sorted_articles[i]['Event_date'] = displayed_event['event_date']

  8%|▊         | 1/12 [00:01<00:18,  1.68s/it]

{'main_event': 'The World Health Organization (WHO) requested urgent access to Gaza to deliver aid and medical supplies.', 'event_date': '2023-10-17'}


 17%|█▋        | 2/12 [00:02<00:12,  1.21s/it]

{'main_event': 'The heads of several major United Nations bodies made a united call for a humanitarian ceasefire in Gaza.', 'event_date': '2023-11-06'}


 25%|██▌       | 3/12 [00:03<00:09,  1.04s/it]

{'main_event': 'Japan announced an additional $88 million in humanitarian aid for Palestinians.', 'event_date': '2023-11-03'}


 33%|███▎      | 4/12 [00:04<00:08,  1.11s/it]

{'main_event': 'Hamas militants launched an attack on Israel, killing 1,400 people and taking 229 hostages.', 'event_date': '2023-10-07'}


 42%|████▏     | 5/12 [00:06<00:10,  1.45s/it]

{'main_event': 'US President Joe Biden pledged US$100 million in humanitarian assistance for Gaza.', 'event_date': '2023-10'}


 50%|█████     | 6/12 [00:07<00:08,  1.34s/it]

{'main_event': 'Aid began flowing into Gaza through the Rafah crossing from Egypt on October 21st.', 'event_date': '2023-10-21'}


 58%|█████▊    | 7/12 [00:09<00:06,  1.37s/it]

{'main_event': 'Israeli Prime Minister Benjamin Netanyahu rejected a ceasefire in the ongoing conflict with Hamas, stating that it would be equivalent to surrendering to the group.', 'event_date': '2023-10-30'}


 67%|██████▋   | 8/12 [00:10<00:05,  1.26s/it]

{'main_event': 'The Singapore Red Cross announced it will send US$200,000 worth of relief supplies to Gaza to help victims of the Hamas-Israel war.', 'event_date': '2023-10-24'}


 75%|███████▌  | 9/12 [00:12<00:04,  1.50s/it]

{'main_event': 'Singapore supported a UN resolution calling for a humanitarian truce in Gaza.', 'event_date': '2023-10-27'}


 83%|████████▎ | 10/12 [00:13<00:02,  1.41s/it]

{'main_event': 'UN aid deliveries to Gaza were suspended again due to shortages of fuel and a communications shutdown.', 'event_date': '2023-11-17'}


 92%|█████████▏| 11/12 [00:14<00:01,  1.25s/it]

{'main_event': "The UN human rights chief condemned allegations of mass killings and executions by Palestinian armed groups and warned against Israel’s 'complete siege' of Gaza.", 'event_date': '2023-10-10'}


100%|██████████| 12/12 [00:15<00:00,  1.27s/it]

{'main_event': 'Israel agreed to allow 140,000 liters of fuel into Gaza every two days.', 'event_date': '2023-11-17'}





In [157]:
sorted_articles

[{'id': 'st_1155620',
  'Title': 'WHO says it needs urgent access to Gaza to deliver aid, medical supplies',
  'Text': 'LONDON  -     The World Health Organization said on Tuesday it needs urgent access to Gaza to deliver aid and medical supplies, as the UN agency warned of a humanitarian crisis in the Israeli-occupied Palestinian enclave.Speaking to media in a briefing, Dr Richard Brennan, regional emergency director of the WHO\'s Eastern Mediterranean regional office, said the WHO was meeting with "decision-makers" on Tuesday to open access to Gaza as soon as possible.Dr Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, said 2,800 people have died and 11,000 injured in Gaza since Israeli air strikes started. About half of them were women and children. REUTERS',
  'Date': '2023-10-17',
  'Article_URL': 'https://www.straitstimes.com/asia/who-says-it-needs-urgent-access-to-gaza-to-deliver-aid-medical-supplies',
  'cosine_score': tensor(0.6840),
  'reranked_

In [162]:
for article in sorted_articles:
    article.pop("cosine_score")
sorted_articles

[{'id': 'st_1155620',
  'Title': 'WHO says it needs urgent access to Gaza to deliver aid, medical supplies',
  'Text': 'LONDON  -     The World Health Organization said on Tuesday it needs urgent access to Gaza to deliver aid and medical supplies, as the UN agency warned of a humanitarian crisis in the Israeli-occupied Palestinian enclave.Speaking to media in a briefing, Dr Richard Brennan, regional emergency director of the WHO\'s Eastern Mediterranean regional office, said the WHO was meeting with "decision-makers" on Tuesday to open access to Gaza as soon as possible.Dr Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, said 2,800 people have died and 11,000 injured in Gaza since Israeli air strikes started. About half of them were women and children. REUTERS',
  'Date': '2023-10-17',
  'Article_URL': 'https://www.straitstimes.com/asia/who-says-it-needs-urgent-access-to-gaza-to-deliver-aid-medical-supplies',
  'reranked_score': 7.182124614715576,
  'Even

In [163]:
with open("../data_upload/hybrid_events.json", "w", encoding="utf-8") as fout:
    json.dump(sorted_articles, fout, indent=4, ensure_ascii=False)

In [9]:
with open("../data_upload/hybrid_events.json", "r", encoding="utf-8") as fin:
    sorted_articles = json.load(fin)

In [10]:
# Finding similar events
events = [event['Event'] for event in sorted_articles]    
events

['The World Health Organization (WHO) requested urgent access to Gaza to deliver aid and medical supplies.',
 'The heads of several major United Nations bodies made a united call for a humanitarian ceasefire in Gaza.',
 'Japan announced an additional $88 million in humanitarian aid for Palestinians.',
 'Hamas militants launched an attack on Israel, killing 1,400 people and taking 229 hostages.',
 'US President Joe Biden pledged US$100 million in humanitarian assistance for Gaza.',
 'Aid began flowing into Gaza through the Rafah crossing from Egypt on October 21st.',
 'Israeli Prime Minister Benjamin Netanyahu rejected a ceasefire in the ongoing conflict with Hamas, stating that it would be equivalent to surrendering to the group.',
 'The Singapore Red Cross announced it will send US$200,000 worth of relief supplies to Gaza to help victims of the Hamas-Israel war.',
 'Singapore supported a UN resolution calling for a humanitarian truce in Gaza.',
 'UN aid deliveries to Gaza were suspend

In [None]:
# Code for processing articles with similar events

In [11]:
sorted_articles

[{'id': 'st_1155620',
  'Title': 'WHO says it needs urgent access to Gaza to deliver aid, medical supplies',
  'Text': 'LONDON  -     The World Health Organization said on Tuesday it needs urgent access to Gaza to deliver aid and medical supplies, as the UN agency warned of a humanitarian crisis in the Israeli-occupied Palestinian enclave.Speaking to media in a briefing, Dr Richard Brennan, regional emergency director of the WHO\'s Eastern Mediterranean regional office, said the WHO was meeting with "decision-makers" on Tuesday to open access to Gaza as soon as possible.Dr Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, said 2,800 people have died and 11,000 injured in Gaza since Israeli air strikes started. About half of them were women and children. REUTERS',
  'Date': '2023-10-17',
  'Article_URL': 'https://www.straitstimes.com/asia/who-says-it-needs-urgent-access-to-gaza-to-deliver-aid-medical-supplies',
  'reranked_score': 7.182124614715576,
  'Even

In [12]:
from datetime import datetime

def format_timeline_date(date_str):
    formats = ['%Y', '%Y-%m-%d', '%Y-%m']
    for fmt in formats:
        try:
            date_obj = datetime.strptime(date_str, fmt)
            if fmt == '%Y':
                return date_obj.strftime('%Y')
            elif fmt == '%Y-%m-%d':
                return date_obj.strftime('%d %B %Y')
            elif fmt == '%Y-%m':
                return date_obj.strftime('%B %Y')
        except ValueError:
            continue
    return None

sorted_events = sorted([{"Event": event['Event'], "Date": event['Event_date'], "Article_URL": event['Article_URL']} for event in sorted_articles], key= lambda x: x['Date'])
for event in sorted_events:
    event['Date'] = format_timeline_date(event['Date'])

[{'Event': 'US President Joe Biden pledged US$100 million in humanitarian assistance for Gaza.',
  'Date': '2023-10',
  'Article_URL': 'https://www.straitstimes.com/opinion/the-nightmare-of-delivering-aid-during-this-israel-hamas-war'},
 {'Event': 'Hamas militants launched an attack on Israel, killing 1,400 people and taking 229 hostages.',
  'Date': '2023-10-07',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/palestinian-americans-fundraise-for-gaza-as-aid-groups-receive-record-donations'},
 {'Event': "The UN human rights chief condemned allegations of mass killings and executions by Palestinian armed groups and warned against Israel’s 'complete siege' of Gaza.",
  'Date': '2023-10-10',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/who-calls-for-humanitarian-corridor-into-gaza-as-war-displaces-over-200000'},
 {'Event': 'The World Health Organization (WHO) requested urgent access to Gaza to deliver aid and medical supplies.',
  'Date': '2023-10-17',

In [14]:
sorted_events

[{'Event': 'US President Joe Biden pledged US$100 million in humanitarian assistance for Gaza.',
  'Date': 'October 2023',
  'Article_URL': 'https://www.straitstimes.com/opinion/the-nightmare-of-delivering-aid-during-this-israel-hamas-war'},
 {'Event': 'Hamas militants launched an attack on Israel, killing 1,400 people and taking 229 hostages.',
  'Date': '07 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/palestinian-americans-fundraise-for-gaza-as-aid-groups-receive-record-donations'},
 {'Event': "The UN human rights chief condemned allegations of mass killings and executions by Palestinian armed groups and warned against Israel’s 'complete siege' of Gaza.",
  'Date': '10 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/who-calls-for-humanitarian-corridor-into-gaza-as-war-displaces-over-200000'},
 {'Event': 'The World Health Organization (WHO) requested urgent access to Gaza to deliver aid and medical supplies.',
  'Date'