## Objective of notebook:

- To explore the keyword search and retrieval for the relevant articles.
- In order to make the search process faster, likely need some mechanism to generate some embeddings for the keywords or tags.
- Currently, I am using a hierarchical clustering to run clustering on all the articles.

In [1]:
import json
import yaml
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import torch
import requests
from torch import nn
from tqdm import trange
from pymongo import MongoClient


# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)
    
load_dotenv()
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")
# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)

# Initialise mongo client.
mongo_client = MongoClient(config["database"]["uri"])

In [2]:
# Explore the current data 
files = ["../data/test_data/test.json", "../data/test_data/train.json"]
def combine_json(files):
    combined_data = []
    for file in files:
        with open(file, 'r', encoding='utf-8') as fin:
            # Load data from the file and append it to the combined list
            data = json.load(fin)
            combined_data.extend(data)
    return combined_data
db = combine_json(files)

In [3]:
cos_sim = nn.CosineSimilarity(dim=0)

def get_cosine_tags(test_article, train_article, limit):
    # Convert lists to tensors
    embeddings_1_tensor = torch.tensor(eval(test_article['phrase_Bert_tags_embeddings']))
    embeddings_2_tensor = torch.tensor(eval(train_article['phrase_Bert_tags_embeddings']))
    
    # Store cosine similarities
    cosine_similarities = []

    # Compute cosine similarity for each pair of embeddings and store the results
    for emb1 in embeddings_1_tensor:
        for emb2 in embeddings_2_tensor:
            cosine_similarities.append(cos_sim(emb1, emb2).item())

    # Sort the cosine similarities and select the 10 smallest
    sorted_cosine_similarities = sorted(cosine_similarities)
    sum_similarities = sum(sorted_cosine_similarities[:limit])
    return sum_similarities

In [4]:
train_ids = ['st_1164990',
'st_1165295',
'st_1164227',
'st_1158236',
'st_1158485',
'st_1159581',
'st_1158779',
'st_1157082',
'st_1160289']
test_indexes = []
for id in train_ids:
    for i in range(len(db)):
        if db[i]['st_id'] == id:
            test_indexes.append(i)
        elif db[i]['st_id'] == "st_1159793":
            train_index = i        

print(train_index)
print(test_indexes)

1033
[578, 1932, 413, 1842, 1101, 45, 1097, 539, 1646]


In [5]:
train_db = []
test_article = db[train_index]
for index in test_indexes:
    train_db.append(db[index])

In [7]:
def dense_embed(payload: str) -> str:
        response = requests.post(dense_embedder_api, headers={"Authorization": f"Bearer {hf_key}"}, json=payload)
        return response.json()

def get_cosine_titles(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['Title_embeddings'])))
    return similarity

def get_similar_by_titles(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_titles(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the titles: {timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20] 

def get_cosine_text(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['embeddings'])))
    return similarity

def get_similar_by_text(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_text(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the texts: \n{timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20]


timeline_header = "The Israel-Hamas Conflict"
similar_article_text = get_similar_by_text(test_article, timeline_header, db)
similar_articles_titles = get_similar_by_titles(test_article, timeline_header, db)

Computing similarities...



100%|██████████| 2007/2007 [00:01<00:00, 1182.12it/s]


Title of test article: Japan to provide $88 million in additional humanitarian aid to Palestinians
Below are the best articles that are closest to this desired timeline based on the texts: 
The Israel-Hamas Conflict

More questions than answers as Israeli PM Netanyahu seeks security control over Gaza
tensor(0.7056)

Israeli army to confront resilient foe in anticipated Gaza invasion
tensor(0.6903)

What links Hamas to the Axis of Resistance and its patron Iran? 
tensor(0.6892)

I negotiated Israel’s hardest hostage deal. Here’s what’s next in Gaza
tensor(0.6693)

Israel-Hamas war and the dangers of a creeping occupation
tensor(0.6672)

While You Were Sleeping: 5 stories you might have missed, Oct 16
tensor(0.6537)

Israeli, US spy chiefs meet Qatari PM to discuss 'building on' Gaza truce: Source 
tensor(0.6512)

The secrets Hamas knew about Israel’s military
tensor(0.6495)

As Israel bombards Gaza, bakeries run out of bread, water runs low
tensor(0.6475)

Israel makes first raids into 

100%|██████████| 2007/2007 [00:01<00:00, 1167.70it/s]

Title of test article: Japan to provide $88 million in additional humanitarian aid to Palestinians
Below are the best articles that are closest to this desired timeline based on the titles: The Israel-Hamas Conflict

Israel-Hamas war and the dangers of a creeping occupation
tensor(0.7952)

The nightmare of delivering aid during this Israel-Hamas war
tensor(0.6971)

World reacts to Israel-Hamas war
tensor(0.6932)

Israeli and Hamas fighters in close combat in Gaza City as civilians flee
tensor(0.6817)

The Hamas tunnel city beneath Gaza – a hidden front line for Israel
tensor(0.6470)

The secrets Hamas knew about Israel’s military
tensor(0.6370)

What links Hamas to the Axis of Resistance and its patron Iran? 
tensor(0.6248)

White House: No sign other actors to join Israel-Hamas conflict
tensor(0.6241)

I negotiated Israel’s hardest hostage deal. Here’s what’s next in Gaza
tensor(0.6162)

Why Hezbollah is a wild card as Israel takes aim at Hamas
tensor(0.6104)

Gaza hospital for Palest




In [8]:
def get_titles_str(similar_articles_titles, similar_article_text):
    combined_similars = []
    for i in range(len(similar_article_text)):
        combined_similars.append(similar_article_text[i])
        combined_similars.append(similar_articles_titles[i])
    
    # Initialize a set to track seen titles
    seen_titles = set()

    # List comprehension to remove duplicates based on 'Title'
    unique_list = []
    for item in combined_similars:
        title = item["Title"]
        if title not in seen_titles:
            seen_titles.add(title)
            unique_list.append(item)

    return unique_list
combined_titles = get_titles_str(similar_articles_titles, similar_article_text)
print(timeline_header)
combined_titles

The Israel-Hamas Conflict


[{'id': 'st_1160933',
  'Title': 'More questions than answers as Israeli PM Netanyahu seeks security control over Gaza',
  'Text': 'JERUSALEM - Prime Minister Benjamin Netanyahu’s declaration this week that Israel would take control of security in Gaza for an indefinite period has added to uncertainty over the future of the besieged enclave, more than a month into the war.Israeli officials have since tried to make clear that they do not intend to reoccupy Gaza, from which Israeli forces withdrew in 2005. But there has been no clarity on how to ensure security without maintaining a military presence in the territory.Only one thing has been stressed repeatedly – Hamas must be destroyed.The movement launched an attack on southern Israel on Oct 7, which Israel says killed 1,400 people. Hamas also took more than 240 hostages.Israel responded to that attack with retaliatory strikes on Gaza, which is ruled by Hamas. Israel’s bombardments have killed more than 10,500 Palestinians.“They cannot 

## Using a re ranker model to re rank these. 

In [9]:
from langchain_groq import ChatGroq
# Load environment variables
load_dotenv()
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")

In [10]:
from sentence_transformers import CrossEncoder
cross_encoder = CrossEncoder(
    "cross-encoder/ms-marco-TinyBERT-L-2-v2", max_length=512, device="cpu"
)

  from tqdm.autonotebook import tqdm, trange


## Idea for the use of a re ranker with the hybrid search
- The clustering could retrieve maybe around 40 ish articles about some topic. However, not all of them are relevant. 
- The re ranker and a threshold of a similarity score would be used to find some sort of similarity score to the required timeline. 

In [11]:
timeline_header

'The Israel-Hamas Conflict'

In [12]:
unranked_docs = [(timeline_header, doc['Text']) for doc in combined_titles]
# Get the scores
scores = cross_encoder.predict(unranked_docs).tolist()

for i in range(len(combined_titles)):
    # Criteria that it has to be positive relationship between the timeline header and the article
    if scores[i]>0:
        combined_titles[i]['reranked_score'] = scores[i]
combined_articles = [article for article in combined_titles if 'reranked_score' in article]
combined_articles

[{'id': 'st_1160933',
  'Title': 'More questions than answers as Israeli PM Netanyahu seeks security control over Gaza',
  'Text': 'JERUSALEM - Prime Minister Benjamin Netanyahu’s declaration this week that Israel would take control of security in Gaza for an indefinite period has added to uncertainty over the future of the besieged enclave, more than a month into the war.Israeli officials have since tried to make clear that they do not intend to reoccupy Gaza, from which Israeli forces withdrew in 2005. But there has been no clarity on how to ensure security without maintaining a military presence in the territory.Only one thing has been stressed repeatedly – Hamas must be destroyed.The movement launched an attack on southern Israel on Oct 7, which Israel says killed 1,400 people. Hamas also took more than 240 hostages.Israel responded to that attack with retaliatory strikes on Gaza, which is ruled by Hamas. Israel’s bombardments have killed more than 10,500 Palestinians.“They cannot 

In [13]:
sorted_articles = sorted(combined_articles, key=lambda x: x['reranked_score'], reverse=True)
if len(sorted_articles)>12:
    sorted_articles = sorted_articles[:12]

In [14]:
# Groq to generate main event for each article
import re
import json
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate

chat_model = "llama3-8b-8192"
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)

In [15]:
def clean_llm_output(output):
        text = output.parts[0].text.replace("```", '').replace('json','')
        result = json.loads(text)
        return result
    
def groq_event(date, title, text):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class summarized_event(BaseModel):
        main_event: str = Field(description="Main event of the article")
        event_date: str = Field(description="Date which the main event occured in YYYY-MM-DD")
    
    parser = JsonOutputParser(pydantic_object=summarized_event)

    
    template = '''
You are a news article editor. Analyse the article deeply, and describe the main event of the article below in one short sentence.
Using this main event and the publication date, identify the date at when this main event occured.
You should use any time references such as "last week," "last month," or specific dates. 
If the article does not specify the exact date, save the date in the YYYY-MM-XX or YYYY-XX-XX format.
Do not provide any explanations for your answer.

Publication Date:
{date}
Article Title:
{title}
Article Text:
{text}

{format_instructions}
Before you return the answer, ensure and double check that you have adhered the answer format instructions strictly.
'''
    prompt = PromptTemplate(
        template=template,
        input_variables=["date", "title", "text"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    final_prompt = prompt.format(date=date, title=title, text=text)
    response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
    
    cleaned_output = clean_llm_output(response)
    print(cleaned_output)
    return cleaned_output

In [16]:
for i in trange(len(sorted_articles)):
    article_text = sorted_articles[i]['Text']
    article_title = sorted_articles[i]['Title']
    article_date = sorted_articles[i]['Date']
    displayed_event = groq_event(article_date, article_title, article_text)
    sorted_articles[i]['Event'] = displayed_event['main_event']
    sorted_articles[i]['Event_date'] = displayed_event['event_date']

  8%|▊         | 1/12 [00:01<00:13,  1.19s/it]

{'main_event': 'The White House stated that there are no signs of other actors joining the Israel-Hamas conflict.', 'event_date': '2023-10-13'}


 17%|█▋        | 2/12 [00:02<00:10,  1.06s/it]

{'main_event': "Jordan's foreign minister Ayman Safadi expressed doubts about Israel's ability to wipe out Hamas.", 'event_date': '2023-11-17'}


 25%|██▌       | 3/12 [00:03<00:09,  1.01s/it]

{'main_event': 'President Tharman called for increased aid and protection of civilians in the Gaza conflict during a speech on October 22nd.', 'event_date': '2023-10-22'}


 33%|███▎      | 4/12 [00:04<00:09,  1.17s/it]

{'main_event': 'The US House of Representatives will vote on Tuesday in its latest attempt to elect a speaker.', 'event_date': '2023-10-17'}


 42%|████▏     | 5/12 [00:05<00:07,  1.10s/it]

{'main_event': "The White House suggested 'pauses' in the Israel-Hamas conflict to allow for humanitarian aid and safe evacuations from Gaza.", 'event_date': '2023-11-02'}


 50%|█████     | 6/12 [00:07<00:08,  1.44s/it]

{'main_event': 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.', 'event_date': '2023-10-10'}


 58%|█████▊    | 7/12 [00:09<00:08,  1.62s/it]

{'main_event': "Israel's Prime Minister Benjamin Netanyahu vowed to 'demolish Hamas' as his troops prepared to move into the Gaza Strip in pursuit of Hamas militants.", 'event_date': '2023-10-15'}


 67%|██████▋   | 8/12 [00:10<00:06,  1.55s/it]

{'main_event': 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.', 'event_date': '2023-10-10'}


 75%|███████▌  | 9/12 [00:12<00:04,  1.49s/it]

{'main_event': 'Israeli Prime Minister Benjamin Netanyahu declared that Israel would take control of security in Gaza for an indefinite period.', 'event_date': '2023-11-02'}


 83%|████████▎ | 10/12 [00:14<00:03,  1.57s/it]

{'main_event': 'Hamas launched a surprise attack on Israel, killing hundreds and taking hostages.', 'event_date': '2023-10-07'}


 92%|█████████▏| 11/12 [00:15<00:01,  1.39s/it]

{'main_event': 'Palestinian envoy to China called on Beijing to play a role in ending the Israel-Hamas war', 'event_date': '2023-10-16'}


100%|██████████| 12/12 [00:17<00:00,  1.42s/it]

{'main_event': 'Israel and Hamas agreed to a ceasefire for at least four days to let in aid and allow the release of at least 50 hostages held by Hamas in exchange for some 150 Palestinians jailed in Israel.', 'event_date': '2023-11-22'}





In [18]:
for article in sorted_articles:
    article.pop("cosine_score")
sorted_articles

[{'id': 'st_1154834',
  'Title': 'White House: No sign other actors to join Israel-Hamas conflict',
  'Text': 'WASHINGTON  -     The White House said on Friday it has not seen any indications that other actors were considering joining and widening the ongoing conflict between Israel and Palestinian militant group Hamas."We haven\'t seen any other external actor that has indicated a willingness or readiness to widen and deepen this conflict," White House National Security Council spokesperson John Kirby told reporters.    The comments came hours after Hezbollah deputy chief Naim Qassem said on Friday that the group would not be swayed by calls for it to stay on the sidelines of the ongoing conflict between Israel and Hamas, saying the party was "fully ready" to contribute to the fighting. REUTERS',
  'Date': '2023-10-14',
  'Article_URL': 'https://www.straitstimes.com/asia/white-house-no-sign-other-actors-to-join-israel-hamas-conflict',
  'reranked_score': 6.767659664154053,
  'Event': 

In [19]:
with open("../data_upload/hybrid_events.json", "w", encoding="utf-8") as fout:
    json.dump(sorted_articles, fout, indent=4, ensure_ascii=False)

In [20]:
with open("../data_upload/hybrid_events.json", "r", encoding="utf-8") as fin:
    sorted_articles = json.load(fin)

In [19]:
# Finding similar events
events = [event['Event'] for event in sorted_articles]    
events

['The White House stated that there are no signs of other actors joining the Israel-Hamas conflict.',
 "Jordan's foreign minister Ayman Safadi expressed doubts about Israel's ability to wipe out Hamas.",
 'President Tharman called for increased aid and protection of civilians in the Gaza conflict during a speech on October 22nd.',
 'The US House of Representatives will vote on Tuesday in its latest attempt to elect a speaker.',
 "The White House suggested 'pauses' in the Israel-Hamas conflict to allow for humanitarian aid and safe evacuations from Gaza.",
 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
 "Israel's Prime Minister Benjamin Netanyahu vowed to 'demolish Hamas' as his troops prepared to move into the Gaza Strip in pursuit of Hamas militants.",
 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
 'Israeli Prime Minister Be

In [22]:
# Code for processing articles with similar events

In [23]:
sorted_articles

[{'id': 'st_1155620',
  'Title': 'WHO says it needs urgent access to Gaza to deliver aid, medical supplies',
  'Text': 'LONDON  -     The World Health Organization said on Tuesday it needs urgent access to Gaza to deliver aid and medical supplies, as the UN agency warned of a humanitarian crisis in the Israeli-occupied Palestinian enclave.Speaking to media in a briefing, Dr Richard Brennan, regional emergency director of the WHO\'s Eastern Mediterranean regional office, said the WHO was meeting with "decision-makers" on Tuesday to open access to Gaza as soon as possible.Dr Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, said 2,800 people have died and 11,000 injured in Gaza since Israeli air strikes started. About half of them were women and children. REUTERS',
  'Date': '2023-10-17',
  'Article_URL': 'https://www.straitstimes.com/asia/who-says-it-needs-urgent-access-to-gaza-to-deliver-aid-medical-supplies',
  'reranked_score': 7.182124614715576,
  'Even

In [20]:
from datetime import datetime

def format_timeline_date(date_str):
    formats = ['%Y', '%Y-%m-%d', '%Y-%m']
    for fmt in formats:
        try:
            date_obj = datetime.strptime(date_str, fmt)
            if fmt == '%Y':
                return date_obj.strftime('%Y')
            elif fmt == '%Y-%m-%d':
                return date_obj.strftime('%d %B %Y')
            elif fmt == '%Y-%m':
                return date_obj.strftime('%B %Y')
        except ValueError:
            continue
    return None


sorted_events = sorted([{"Event": event['Event'], "Date": event['Event_date'], "Article_URL": event['Article_URL'], "Article_title": event['Title']} for event in sorted_articles], key= lambda x: x['Date'])
for event in sorted_events:
    event['Date'] = format_timeline_date(event['Date'])

In [21]:
sorted_events

[{'Event': 'Hamas launched a surprise attack on Israel, killing hundreds and taking hostages.',
  'Date': '07 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/world-reacts-to-israel-gaza-war',
  'Article_title': 'World reacts to Israel-Hamas war'},
 {'Event': 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
  'Date': '10 October 2023',
  'Article_URL': 'https://www.straitstimes.com/asia/explainer-what-war-crimes-laws-apply-to-the-israel-palestinian-conflict',
  'Article_title': 'What war crimes laws apply to the Israel-Palestinian conflict?'},
 {'Event': 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
  'Date': '10 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/europe/hamas-cash-to-crypto-global-finance-maze-in-israels-sights',
  'Article_title': "Hamas' cash-to-crypto global fina

In [22]:
# only do this temporarily:
for event in sorted_events:
    url_title_pair = {}
    url_title_pair["url"] = event['Article_URL']
    url_title_pair["title"] = event['Article_title']
    event_url = []
    event_url.append(url_title_pair)
    event['Article_URL'] = event_url
    # remember to converr to list
sorted_events

[{'Event': 'Hamas launched a surprise attack on Israel, killing hundreds and taking hostages.',
  'Date': '07 October 2023',
  'Article_URL': [{'url': 'https://www.straitstimes.com/world/middle-east/world-reacts-to-israel-gaza-war',
    'title': 'World reacts to Israel-Hamas war'}],
  'Article_title': 'World reacts to Israel-Hamas war'},
 {'Event': 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
  'Date': '10 October 2023',
  'Article_URL': [{'url': 'https://www.straitstimes.com/asia/explainer-what-war-crimes-laws-apply-to-the-israel-palestinian-conflict',
    'title': 'What war crimes laws apply to the Israel-Palestinian conflict?'}],
  'Article_title': 'What war crimes laws apply to the Israel-Palestinian conflict?'},
 {'Event': 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
  'Date': '10 October 2023',
  'Article_URL': [{'url'

In [23]:
import sys
print("Fetching database to store the generated timeline.. \n")
    # Pull database
db = mongo_client[config["database"]["name"]]
    
    # Get collection from database
gen_timeline_documents = db[config["database"]["hybrid_timeline_collection"]]
    
test_article_id = test_article['st_id']
test_article_title = test_article['Title']

# If no error in timeline, then generate a heading for it
print("Generating the timeline header...\n")
timeline_display_header = "Timeline of " + timeline_header
# Convert the timeline to JSON
timeline_json = json.dumps(sorted_events)
timeline_return = {"Article_id": test_article_id, 
                           "Article_Title": test_article_title, 
                           "Timeline_header": timeline_display_header,
                           "Timeline": timeline_json}
timeline_export = timeline_return
        
# Send the timeline data to MongoDB
try:
    # Insert result into collection
    gen_timeline_documents.insert_one(timeline_export)
    print(f"Timeline with article id {test_article_id} successfully saved to MongoDB")
except Exception as error:
    print(f"Unable to save timeline to database. Check your connection the database...\nERROR: {error}\n")
    sys.exit()


Fetching database to store the generated timeline.. 

Generating the timeline header...

Timeline with article id st_1159793 successfully saved to MongoDB
