## Objective of notebook:

- To explore the keyword search and retrieval for the relevant articles.
- In order to make the search process faster, likely need some mechanism to generate some embeddings for the keywords or tags.
- Currently, I am using a hierarchical clustering to run clustering on all the articles.

In [1]:
import json
import yaml
import os
import pandas as pd
import numpy as np
from dotenv import load_dotenv
import torch
import requests
from torch import nn
from tqdm import trange
from pymongo import MongoClient

# Groq to generate main event for each article
import re
import json
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.prompts import PromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate

chat_model = "llama3-8b-8192"
load_dotenv()
groq_api_key = os.getenv('GROQ_API_KEY')
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)


# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)
    
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")
# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)

# Initialise mongo client.
mongo_client = MongoClient(config["database"]["uri"])

In [2]:
# Load the current data 
files = ["../data/test_data/test.json", "../data/test_data/train.json"]
def combine_json(files):
    combined_data = []
    for file in files:
        with open(file, 'r', encoding='utf-8') as fin:
            # Load data from the file and append it to the combined list
            data = json.load(fin)
            combined_data.extend(data)
    return combined_data
db = combine_json(files)

In [3]:
test_id = "st_1155048"
cos_sim = nn.CosineSimilarity(dim=0)
for i in range(len(db)):
    if db[i]['st_id'] == test_id:
        test_index = i
test_article = db[test_index]
test_article

{'Text': 'BRUSSELS - European Council president Charles Michel said on Saturday that he had convened a video conference summit of European Union leaders on Tuesday to discuss the Hamas attacks on Israelis and Israel’s response.Mr Michel said the bloc stood in “full solidarity” with the people of Israel after the “brutal terrorist attacks” of a week ago.In an invitation letter to EU leaders, Mr Michel said Israel had the right to defend itself in compliance with international law.He said the siege of the Gaza Strip was raising alarm bells in the international community, prompting him to convene a video conference meeting on Tuesday at 5.30pm Central European Time (11.30pm Singapore time).“It is of utmost importance that the European Council, in line with the treaties and our values, sets our common position and establishes a clear unified course of action that reflects the complexity of the unfolding situation,” he wrote.Mr Michel said the EU had to be an advocate of peace and respect f

In [4]:
# Generate the header of the timeline of the desired article
def clean_llm_output(output):
        text = output.parts[0].text.replace("```", '').replace('json','')
        result = json.loads(text)
        return result
    
def groq_header(title):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class timeline_headaer(BaseModel):
        timeline_header: str = Field(description="Suitable header of a timeline for this article")
    
    parser = JsonOutputParser(pydantic_object=timeline_headaer)

    template = '''
I would like to create a timeline of events based on the title of an article.
Given a list of article titles below, you are tasked with creating an extremely generalised, suitable name for a timeline for this article that will provide a reader contextual information about a timeline of events regarding the article.
The header should be something that can be generalised to other similar articles.
For instance, if a title is "S’pore Red Cross gives $270k worth of relief aid to victims of Hamas-Israel war in Gaza", the header should be "Relief Aid for Gaza Conflict Victims"

Article Title:
{title}

{format_instructions}
Before you return the answer, ensure and double check that you have adhered the answer format instructions strictly.
'''
    prompt = PromptTemplate(
        template=template,
        input_variables=["title"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    final_prompt = prompt.format(title=title)
    response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
    cleaned_output = clean_llm_output(response)
    extracted_header= list(cleaned_output.values())[0]
    return extracted_header

title = test_article['Title']
timeline_header = groq_header(title)
print(timeline_header)

EU Response to Israel-Hamas Conflict


In [7]:
def dense_embed(payload: str) -> str:
        response = requests.post(dense_embedder_api, headers={"Authorization": f"Bearer {hf_key}"}, json=payload)
        return response.json()

def get_cosine_text(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['embeddings'])))
    return similarity

def get_cosine_titles(timeline_embed, train_article):
    similarity = cos_sim(torch.tensor(timeline_embed), torch.tensor(eval(train_article['Title_embeddings'])))
    return similarity

def get_similar_by_text(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_text(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the texts: \n{timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20]

def get_similar_by_titles(test_article, timeline_header, db):
    print("Computing similarities...\n")
    timeline_heading_embed = dense_embed(timeline_header)
    by_tags_records = []
    for i in trange(len(db)):
        dic = {}
        dic['id'] = db[i]['st_id']
        dic['Title'] = db[i]['Title']
        dic['Text'] = db[i]['Text']
        dic['Date'] = db[i]['Publication_date']
        dic['Article_URL'] = db[i]['article_url']
        dic['cosine_score'] = get_cosine_titles(timeline_heading_embed, db[i])
        by_tags_records.append(dic)

    print("Title of test article: " + test_article['Title'])
    print(f"Below are the best articles that are closest to this desired timeline based on the titles: {timeline_header}\n")

    by_tags_records.sort(key = lambda x: x['cosine_score'], reverse=True)
    i = 0
    for tag in by_tags_records:
        if i >=20:
            break
        print(tag['Title'])
        print(tag['cosine_score'])
        print()
        i += 1
    # Returns the top 10 most similar articles (might not always need top 10)
    return by_tags_records[:20]

similar_article_text = get_similar_by_text(test_article, timeline_header, db)
similar_articles_titles = get_similar_by_titles(test_article, timeline_header, db)


Computing similarities...



100%|██████████| 2007/2007 [00:01<00:00, 1159.03it/s]


Title of test article: EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  
Below are the best articles that are closest to this desired timeline based on the texts: 
EU Response to Israel-Hamas Conflict

EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  
tensor(0.7289)

World reacts to Israel-Hamas war
tensor(0.7239)

Palestinian state best guarantee of Israel security: EU’s Borrell
tensor(0.6943)

EU's Borrell tells Israel: 'One horror doesn’t justify another'
tensor(0.6791)

EU continues talks on humanitarian ceasefire in Israel-Hamas war
tensor(0.6602)

Maritime corridor, floating hospitals for Gaza in focus at Paris conference
tensor(0.6277)

More questions than answers as Israeli PM Netanyahu seeks security control over Gaza
tensor(0.6266)

Arab leaders at Cairo Peace Summit denounce Israel attacks on Gaza as risks to region rise
tensor(0.6223)

Erdogan to Putin: Western 'silence' worsening humanitarian situation in Gaza

100%|██████████| 2007/2007 [00:01<00:00, 1147.79it/s]

Title of test article: EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  
Below are the best articles that are closest to this desired timeline based on the titles: EU Response to Israel-Hamas Conflict

EU continues talks on humanitarian ceasefire in Israel-Hamas war
tensor(0.7357)

World reacts to Israel-Hamas war
tensor(0.6752)

EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  
tensor(0.6214)

Israel-Hamas war and the dangers of a creeping occupation
tensor(0.6067)

White House: No sign other actors to join Israel-Hamas conflict
tensor(0.6008)

Erdogan to Putin: Western 'silence' worsening humanitarian situation in Gaza
tensor(0.5840)

Palestinian state best guarantee of Israel security: EU’s Borrell
tensor(0.5834)

UN and medical agencies condemn Israel's Gaza ambulance strike
tensor(0.5829)

S’pore ‘deeply regrets’ impact of Israel’s military operations on Gaza hospital: MFA
tensor(0.5649)

Putin says Brics could help r




In [10]:
def combine_titles(similar_articles_titles, similar_article_text):
    combined_similars = []
    for i in range(len(similar_article_text)):
        combined_similars.append(similar_article_text[i])
        combined_similars.append(similar_articles_titles[i])
    
    # Initialize a set to track seen titles
    seen_titles = set()

    # List comprehension to remove duplicates based on 'Title'
    unique_list = []
    for item in combined_similars:
        title = item["Title"]
        if title not in seen_titles:
            seen_titles.add(title)
            unique_list.append(item)

    return unique_list
combined_titles = combine_titles(similar_articles_titles, similar_article_text)
print(timeline_header)
combined_titles

EU Response to Israel-Hamas Conflict


[{'id': 'st_1155048',
  'Title': 'EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  ',
  'Text': 'BRUSSELS - European Council president Charles Michel said on Saturday that he had convened a video conference summit of European Union leaders on Tuesday to discuss the Hamas attacks on Israelis and Israel’s response.Mr Michel said the bloc stood in “full solidarity” with the people of Israel after the “brutal terrorist attacks” of a week ago.In an invitation letter to EU leaders, Mr Michel said Israel had the right to defend itself in compliance with international law.He said the siege of the Gaza Strip was raising alarm bells in the international community, prompting him to convene a video conference meeting on Tuesday at 5.30pm Central European Time (11.30pm Singapore time).“It is of utmost importance that the European Council, in line with the treaties and our values, sets our common position and establishes a clear unified course of action that reflects 

## Using a re ranker model to re rank. 

In [9]:
from langchain_groq import ChatGroq
# Load environment variables
load_dotenv()
hf_key = os.getenv('HUGGINGFACE_API_KEY')
dense_embedder_api = os.getenv("HF_API_URL")

In [11]:
from sentence_transformers import CrossEncoder
cross_encoder = CrossEncoder(
    "cross-encoder/ms-marco-TinyBERT-L-2-v2", max_length=512, device="cpu"
)



## Idea for the use of a re ranker with the hybrid search
- The clustering could retrieve maybe around 40 ish articles about some topic. However, not all of them are relevant. 
- The re ranker and a threshold of a similarity score would be used to find some sort of similarity score to the required timeline. 

In [12]:
timeline_header

'EU Response to Israel-Hamas Conflict'

In [13]:
unranked_docs = [(timeline_header, doc['Text']) for doc in combined_titles]
# Get the scores
scores = cross_encoder.predict(unranked_docs).tolist()

for i in range(len(combined_titles)):
    # Criteria that it has to be positive relationship between the timeline header and the article
    if scores[i]>0:
        combined_titles[i]['reranked_score'] = scores[i]
combined_articles = [article for article in combined_titles if 'reranked_score' in article]
combined_articles

[{'id': 'st_1155048',
  'Title': 'EU leaders to hold emergency virtual summit on Israel-Hamas conflict on Tuesday  ',
  'Text': 'BRUSSELS - European Council president Charles Michel said on Saturday that he had convened a video conference summit of European Union leaders on Tuesday to discuss the Hamas attacks on Israelis and Israel’s response.Mr Michel said the bloc stood in “full solidarity” with the people of Israel after the “brutal terrorist attacks” of a week ago.In an invitation letter to EU leaders, Mr Michel said Israel had the right to defend itself in compliance with international law.He said the siege of the Gaza Strip was raising alarm bells in the international community, prompting him to convene a video conference meeting on Tuesday at 5.30pm Central European Time (11.30pm Singapore time).“It is of utmost importance that the European Council, in line with the treaties and our values, sets our common position and establishes a clear unified course of action that reflects 

In [17]:
# Retrieve only the top k articles 
top_k = 12
sorted_articles = sorted(combined_articles, key=lambda x: x['reranked_score'], reverse=True)
if len(sorted_articles)>top_k:
    sorted_articles = sorted_articles[:top_k]

In [18]:
def groq_event(date, title, text):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class summarized_event(BaseModel):
        main_event: str = Field(description="Main event of the article")
        event_date: str = Field(description="Date which the main event occured in YYYY-MM-DD")
    
    parser = JsonOutputParser(pydantic_object=summarized_event)

    
    template = '''
You are a news article editor. Analyse the article deeply, and describe the main event of the article below in one short sentence.
Using this main event and the publication date, identify the date at when this main event occured.
You should use any time references such as "last week," "last month," or specific dates. 
If the article does not specify the exact date, save the date in the YYYY-MM-XX or YYYY-XX-XX format.
Do not provide any explanations for your answer.

Publication Date:
{date}
Article Title:
{title}
Article Text:
{text}

{format_instructions}
Before you return the answer, ensure and double check that you have adhered the answer format instructions strictly.
'''
    prompt = PromptTemplate(
        template=template,
        input_variables=["date", "title", "text"],
        partial_variables={"format_instructions": parser.get_format_instructions()},
    )
    
    final_prompt = prompt.format(date=date, title=title, text=text)
    response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
    
    cleaned_output = clean_llm_output(response)
    print(cleaned_output)
    return cleaned_output

In [19]:
for i in trange(len(sorted_articles)):
    article_text = sorted_articles[i]['Text']
    article_title = sorted_articles[i]['Title']
    article_date = sorted_articles[i]['Date']
    displayed_event = groq_event(article_date, article_title, article_text)
    sorted_articles[i]['Event'] = displayed_event['main_event']
    sorted_articles[i]['Event_date'] = displayed_event['event_date']
    sorted_articles[i].pop("cosine_score")


 12%|█▎        | 1/8 [00:00<00:06,  1.04it/s]

{'main_event': "EU foreign policy chief Josep Borrell urged Israel not to be consumed by rage in its response to last month's Hamas attack", 'event_date': '2023-10-07'}


 25%|██▌       | 2/8 [00:01<00:05,  1.03it/s]

{'main_event': 'EU leaders will hold an emergency virtual summit on the Israel-Hamas conflict', 'event_date': '2023-10-17'}


 38%|███▊      | 3/8 [00:02<00:04,  1.03it/s]

{'main_event': 'EU foreign policy chief Josep Borrell said that the establishment of a Palestinian state would be the best way of ensuring Israel’s security.', 'event_date': '2023-11-20'}


 50%|█████     | 4/8 [00:03<00:03,  1.07it/s]

{'main_event': 'EU countries discussed the possibility of a humanitarian ceasefire in the Israel-Hamas war', 'event_date': '2023-10-23'}


 62%|██████▎   | 5/8 [00:05<00:03,  1.06s/it]

{'main_event': 'Palestinian armed group Hamas launched a series of attacks on Israel by land, sea, and air, resulting in hundreds of casualties.', 'event_date': '2023-10-07'}


 75%|███████▌  | 6/8 [00:06<00:02,  1.11s/it]

{'main_event': 'The US House of Representatives will vote on Tuesday in its latest attempt to elect a speaker.', 'event_date': '2023-10-17'}


 88%|████████▊ | 7/8 [00:07<00:01,  1.04s/it]

{'main_event': 'Iran accuses the United States of being militarily involved in the Israeli-Hamas conflict.', 'event_date': '2023-10-16'}


100%|██████████| 8/8 [00:08<00:00,  1.01s/it]

{'main_event': 'The White House stated that it has not seen any indications that other actors were considering joining the conflict between Israel and Hamas.', 'event_date': '2023-10-13'}





In [20]:
sorted_articles

[{'id': 'st_1162690',
  'Title': "EU's Borrell tells Israel: 'One horror doesn’t justify another'",
  'Text': 'MAGEN, Israel  -     EU foreign policy chief Josep Borrell urged Israel on Thursday not to be consumed by rage in its response to last month\'s Hamas attack, declaring that "one horror does not justify another".Borrell made his remarks on a visit to Israel, speaking alongside Israeli Foreign Minister Eli Cohen after the two men had visited Kibbutz Be\'eri, a focal point of the Oct. 7 assault."I understand your rage but let me ask you not to be consumed by rage. I think that\'s what the best friends of Israel can tell you," he said.Speaking at a regional council building a short drive from the kibbutz, Borrell stressed the EU\'s solidarity with Israel and its support for the country\'s right to defend itself in line with international law."But one thing is to defend Israel and another thing\'s to take care of the people in need," he added."And that is why the European Union, to

In [21]:
with open("../data_upload/hybrid_events.json", "w", encoding="utf-8") as fout:
    json.dump(sorted_articles, fout, indent=4, ensure_ascii=False)

In [22]:
with open("../data_upload/hybrid_events.json", "r", encoding="utf-8") as fin:
    sorted_articles = json.load(fin)

In [19]:
# [Optinal for now] Code for processing articles with similar events
events = [event['Event'] for event in sorted_articles]    
events

['The White House stated that there are no signs of other actors joining the Israel-Hamas conflict.',
 "Jordan's foreign minister Ayman Safadi expressed doubts about Israel's ability to wipe out Hamas.",
 'President Tharman called for increased aid and protection of civilians in the Gaza conflict during a speech on October 22nd.',
 'The US House of Representatives will vote on Tuesday in its latest attempt to elect a speaker.',
 "The White House suggested 'pauses' in the Israel-Hamas conflict to allow for humanitarian aid and safe evacuations from Gaza.",
 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
 "Israel's Prime Minister Benjamin Netanyahu vowed to 'demolish Hamas' as his troops prepared to move into the Gaza Strip in pursuit of Hamas militants.",
 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
 'Israeli Prime Minister Be

In [23]:
sorted_articles

[{'id': 'st_1155620',
  'Title': 'WHO says it needs urgent access to Gaza to deliver aid, medical supplies',
  'Text': 'LONDON  -     The World Health Organization said on Tuesday it needs urgent access to Gaza to deliver aid and medical supplies, as the UN agency warned of a humanitarian crisis in the Israeli-occupied Palestinian enclave.Speaking to media in a briefing, Dr Richard Brennan, regional emergency director of the WHO\'s Eastern Mediterranean regional office, said the WHO was meeting with "decision-makers" on Tuesday to open access to Gaza as soon as possible.Dr Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, said 2,800 people have died and 11,000 injured in Gaza since Israeli air strikes started. About half of them were women and children. REUTERS',
  'Date': '2023-10-17',
  'Article_URL': 'https://www.straitstimes.com/asia/who-says-it-needs-urgent-access-to-gaza-to-deliver-aid-medical-supplies',
  'reranked_score': 7.182124614715576,
  'Even

In [20]:
from datetime import datetime

def format_timeline_date(date_str):
    formats = ['%Y', '%Y-%m-%d', '%Y-%m']
    for fmt in formats:
        try:
            date_obj = datetime.strptime(date_str, fmt)
            if fmt == '%Y':
                return date_obj.strftime('%Y')
            elif fmt == '%Y-%m-%d':
                return date_obj.strftime('%d %B %Y')
            elif fmt == '%Y-%m':
                return date_obj.strftime('%B %Y')
        except ValueError:
            continue
    return None


sorted_events = sorted([{"Event": event['Event'], "Date": event['Event_date'], "Article_URL": event['Article_URL'], "Article_title": event['Title']} for event in sorted_articles], key= lambda x: x['Date'])
for event in sorted_events:
    event['Date'] = format_timeline_date(event['Date'])

In [21]:
sorted_events

[{'Event': 'Hamas launched a surprise attack on Israel, killing hundreds and taking hostages.',
  'Date': '07 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/middle-east/world-reacts-to-israel-gaza-war',
  'Article_title': 'World reacts to Israel-Hamas war'},
 {'Event': 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
  'Date': '10 October 2023',
  'Article_URL': 'https://www.straitstimes.com/asia/explainer-what-war-crimes-laws-apply-to-the-israel-palestinian-conflict',
  'Article_title': 'What war crimes laws apply to the Israel-Palestinian conflict?'},
 {'Event': 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
  'Date': '10 October 2023',
  'Article_URL': 'https://www.straitstimes.com/world/europe/hamas-cash-to-crypto-global-finance-maze-in-israels-sights',
  'Article_title': "Hamas' cash-to-crypto global fina

In [22]:
# only do this temporarily:
for event in sorted_events:
    url_title_pair = {}
    url_title_pair["url"] = event['Article_URL']
    url_title_pair["title"] = event['Article_title']
    event_url = []
    event_url.append(url_title_pair)
    event['Article_URL'] = event_url
    # remember to converr to list
sorted_events

[{'Event': 'Hamas launched a surprise attack on Israel, killing hundreds and taking hostages.',
  'Date': '07 October 2023',
  'Article_URL': [{'url': 'https://www.straitstimes.com/world/middle-east/world-reacts-to-israel-gaza-war',
    'title': 'World reacts to Israel-Hamas war'}],
  'Article_title': 'World reacts to Israel-Hamas war'},
 {'Event': 'The International Criminal Court (ICC) confirmed that its mandate applies to potential crimes committed in the current Israel-Palestine conflict.',
  'Date': '10 October 2023',
  'Article_URL': [{'url': 'https://www.straitstimes.com/asia/explainer-what-war-crimes-laws-apply-to-the-israel-palestinian-conflict',
    'title': 'What war crimes laws apply to the Israel-Palestinian conflict?'}],
  'Article_title': 'What war crimes laws apply to the Israel-Palestinian conflict?'},
 {'Event': 'Israel froze a Barclays bank account and blocked cryptocurrency accounts linked to Hamas fundraising.',
  'Date': '10 October 2023',
  'Article_URL': [{'url'

In [23]:
import sys
print("Fetching database to store the generated timeline.. \n")
    # Pull database
db = mongo_client[config["database"]["name"]]
    
    # Get collection from database
gen_timeline_documents = db[config["database"]["hybrid_timeline_collection"]]
    
test_article_id = test_article['st_id']
test_article_title = test_article['Title']

# If no error in timeline, then generate a heading for it
print("Generating the timeline header...\n")
timeline_display_header = "Timeline of " + timeline_header
# Convert the timeline to JSON
timeline_json = json.dumps(sorted_events)
timeline_return = {"Article_id": test_article_id, 
                           "Article_Title": test_article_title, 
                           "Timeline_header": timeline_display_header,
                           "Timeline": timeline_json}
timeline_export = timeline_return
        
# Send the timeline data to MongoDB
try:
    # Insert result into collection
    gen_timeline_documents.insert_one(timeline_export)
    print(f"Timeline with article id {test_article_id} successfully saved to MongoDB")
except Exception as error:
    print(f"Unable to save timeline to database. Check your connection the database...\nERROR: {error}\n")
    sys.exit()


Fetching database to store the generated timeline.. 

Generating the timeline header...

Timeline with article id st_1159793 successfully saved to MongoDB
