In [1]:
import os
import ast
import sys
import json
import yaml
import re
from json import JSONDecodeError
from concurrent.futures import ThreadPoolExecutor, as_completed
from datetime import datetime
from pymongo import MongoClient


import gradio as gr
import pandas as pd
import numpy as np
from dotenv import load_dotenv
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, fcluster

# Import libraries for working with language models and Google Gemini
from langchain_core.prompts import PromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field

# Load environment variables
load_dotenv()
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)

# Normally where to do this? (in which function?)
with open("../gradio_config.yaml", "r") as config_file:
    config = yaml.safe_load(config_file)

# Initialise mongo client.
mongo_client = MongoClient(config["database"]["uri"])

In [98]:
def clean_llm_score(output):
    text = output.parts[0].text.replace("```", '').replace('json','')
    result = json.loads(text)
    return result

def clean_output(output):
    try:
        updated_timeline = json.loads(output)
        return updated_timeline
    except JSONDecodeError:
        #try 1: Ensuring that the string ends with just the open and close lists brackets
        try:
            new_output = re.search(r'\[[^\]]*\]', output).group(0)
        except AttributeError:
            new_output = re.search(r'\{.*?\}', output, re.DOTALL).group(0)  
        updated_timeline = json.loads(new_output)
        return updated_timeline
    
def extract_json_from_string(string):
    # Use a regular expression to find the content within the first and last square brackets
    match = re.search(r'\[.*\]', string, re.DOTALL)
    
    if match:
        json_content = match.group(0)
        try:
            # Load the extracted content into a JSON object
            json_data = json.loads(json_content)
            return json_data
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)
            return None
    else:
        print("No valid JSON content found.")
        return None

def clean_sort_timeline(timelines, df_retrieve):  
    generated_timeline = []
    for idx, line in timelines.items():
        indiv_timeline = clean_output(line)
        if type(indiv_timeline) == list:
            for el in indiv_timeline:
                generated_timeline.append(el)
        else:
            generated_timeline.append(indiv_timeline)
    unsorted_timeline = []

    for event in generated_timeline:
        article_index = event["Article"] - 1
        event["Article_id"] = df_retrieve.iloc[article_index].id
    for event in generated_timeline:
        del event["Article"]
        unsorted_timeline.append(event)  
        
    timeline = sorted(unsorted_timeline, key=lambda x:x['Date'])
    timeline = [event for event in timeline if event['Date'].lower()!= 'nan']
    for event in timeline:
        date = event['Date']
        if date.endswith('-XX-XX'):
            event['Date'] = date[:4]
        elif date.endswith('-XX'):
            event['Date'] = date[:7]
    return timeline



def format_timeline_date(date_str):
    formats = ['%Y', '%Y-%m-%d', '%Y-%m']
    for fmt in formats:
        try:
            date_obj = datetime.strptime(date_str, fmt)
            if fmt == '%Y':
                return date_obj.strftime('%Y')
            elif fmt == '%Y-%m-%d':
                return date_obj.strftime('%d %B %Y')
            elif fmt == '%Y-%m':
                return date_obj.strftime('%B %Y')
        except ValueError:
            continue
    return None

def split_batches(timeline, max_batch_size=30):
    n = len(timeline)
    if n <= max_batch_size:
        return [timeline]
    
    num_batches = n // max_batch_size
    remainder = n % max_batch_size
    
    if remainder > 0 and remainder < max_batch_size // 2:
        num_batches -= 1
        remainder += max_batch_size

    batches = []
    start = 0
    for i in range(num_batches):
        end = start + max_batch_size
        batches.append(timeline[start:end])
        start = end
    
    if remainder > 0:
        batches.append(timeline[start:start + remainder])
    return batches



def scale_df_embeddings(df_train, df_test):
    print("Processing embedding data and scaling data...\n")
    #Deserializing the embeddings
    body_embeddings_train = np.array(df_train['embeddings'].apply(ast.literal_eval).tolist())
    title_embeddings_train = np.array(df_train['Title_embeddings'].apply(ast.literal_eval).tolist())
    tags_embeddings_train = np.array(df_train['tags_embeddings'].apply(ast.literal_eval).tolist())

    body_embeddings_test = np.array(df_test['embeddings'].apply(ast.literal_eval).tolist())
    title_embeddings_test = np.array(df_test['Title_embeddings'].apply(ast.literal_eval).tolist())
    tags_embeddings_test = np.array(df_test['tags_embeddings'].apply(ast.literal_eval).tolist())
    
    # Combine embeddings
    all_embeddings_train = np.concatenate((body_embeddings_train, title_embeddings_train, tags_embeddings_train), axis=1)
    all_embeddings_test = np.concatenate((body_embeddings_test, title_embeddings_test, tags_embeddings_test), axis=1)

    # Standardize embeddings
    scaler = StandardScaler()
    train_embeddings = scaler.fit_transform(all_embeddings_train)
    test_embeddings = scaler.transform(all_embeddings_test)
    return train_embeddings,  test_embeddings

def get_variance_performance(train_embeddings):
# Experiment for this variance range of 92% to 95%
    print("Finding best Model parameters...\n")
    variance_range = [0.92]
    #variance_range = list(np.arange(0.92, 0.95, 0.01))
    variance_dic = {}

    for variance in variance_range:
        pca = PCA(n_components=variance)
        train_pca_embeddings = pca.fit_transform(train_embeddings)
        
        # Range of max_d values to try, for this dataset we use 65
        max_d_values = np.arange(52, 58)
        
        # List to store silhouette scores
        silhouette_scores_train = []

        # Perform hierarchical clustering
        Z = linkage(train_pca_embeddings, method='ward')

        for max_d in max_d_values:
            clusters_train = fcluster(Z, max_d, criterion='distance')
            
            # Calculate silhouette score only if there are at least 2 unique clusters and fewer than the number of samples
            if 1 < len(set(clusters_train)) < len(train_pca_embeddings):
                score_train = silhouette_score(train_pca_embeddings, clusters_train)
            else:
                score_train = -1  # Assign a score of -1 if less than 2 unique clusters or too many clusters
            
            silhouette_scores_train.append(score_train)

        # Determine the best max_d
        best_max_d_train = max_d_values[np.argmax(silhouette_scores_train)]
        variance_dic[variance] = {
            'max_d_train': best_max_d_train,
            'best_train_silhouette': max(silhouette_scores_train)
        }
    return variance_dic

def get_best_variance(perf_results):
    highest_train_sil = 0
    best_variance_s = []
    for variance, scores in perf_results.items():
        if scores['best_train_silhouette'] > highest_train_sil:
            highest_train_sil = scores['best_train_silhouette']
            best_variance_s = [variance]  
        elif scores['best_train_silhouette'] == highest_train_sil:
            best_variance_s.append(variance)  
    
    final_best_max_d = perf_results[best_variance_s[0]]['max_d_train']
    print(f"Best variance for this dataset is {round(best_variance_s[0], 2)} and the best maximum distance is {final_best_max_d}\n")
    return round(best_variance_s[0], 2), final_best_max_d

def predict_cluster(test_embedding, train_embeddings, clusters):
        distances = np.linalg.norm(train_embeddings - test_embedding, axis=1)
        return clusters[np.argmin(distances)]

def get_cluster_labels(best_variance, best_max_d, train_embeddings, test_embeddings, df_train, df_test):
    # Perform PCA
    print(f"Training new Hierarchical Clustering model with best variance: {best_variance} and max_d: {best_max_d}\n")
    pca = PCA(n_components=best_variance)
    pca_train_embeddings = pca.fit_transform(train_embeddings)
    pca_test_embeddings = pca.transform(test_embeddings)

    Z = linkage(pca_train_embeddings, method='ward', metric='euclidean')
    clusters_train = fcluster(Z, best_max_d, criterion='distance')
    # Predict clusters for test data using the nearest cluster center

    test_clusters = [predict_cluster(te, pca_train_embeddings, clusters_train) for te in pca_test_embeddings]

    df_train['Cluster_labels'] = clusters_train
    df_test['Cluster_labels'] = test_clusters
    df_test.reset_index(drop=True, inplace=True)
    
    # Create a dictionary to store the results
    cluster_dict = {}

    # Populate the dictionary with cluster contents for each test point
    for i, (test_point, test_cluster) in enumerate(zip(df_test.itertuples(), test_clusters)):
        cluster_contents = []
        
        cluster_indices = np.where(clusters_train == test_cluster)[0]
        cluster_df = df_train.iloc[cluster_indices]
        
        cluster_dict = {
            "Test point": {'id': test_point.st_id,
                        "Title": test_point.Title, 
                        "Tags": test_point.tags},
            "Cluster": test_cluster,
            "Cluster contents": cluster_contents
        }
        
        for _, row in cluster_df.iterrows():
            cluster_contents.append({"id": row['st_id'], 
                                    "Title": row['Title'],
                                    "Tags": row['tags'], 
                                    })

    print(f"Cluster Label {test_cluster} is chosen\n")
    input_list = ""
    input_list += f"Test Artice Chosen: (Title: {cluster_dict['Test point']['Title']}\nTags: {cluster_dict['Test point']['Tags']}):\n"
    for _, row in cluster_df.iterrows():
        input_list += f"Article id: {row['st_id']}, Title: {row['Title']}, Tags: {row['tags']}]\n"
    return input_list, df_train, df_test

def generate_clusters(df_train, df_test):
    train_embeddings, test_embeddings = scale_df_embeddings(df_train, df_test)
    variance_perf = get_variance_performance(train_embeddings)
    best_variance, best_max_d = get_best_variance(variance_perf)
    relevant_articles, df_train, df_test = get_cluster_labels(best_variance, best_max_d, train_embeddings, test_embeddings, df_train, df_test)
    return relevant_articles, df_train, df_test



def to_generate_timeline(test_data):
    print("Evaluating necessity of Timeline for this article.\n")
    llm = genai.GenerativeModel('gemini-1.5-flash-latest')
    class Event(BaseModel):
        score: int = Field(description="The need for this article to have a timeline")
        Reason: str = Field(description = "The main reason for your choice why a timeline is needed or why it is not needed")
            
    output_parser = JsonOutputParser(pydantic_object=Event)

        # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    # Define the template
    template = '''
    You are a highly intelligent AI tasked with analyzing articles to determine whether generating a timeline of events leading up to the key event in the article would be beneficial. 
    Consider the following factors to make your decision:
    1. **Significance of the Event**:
       - Does the event have a significant impact on a large number of people, industries, or countries?
       - Are the potential long-term consequences of the event important?

    2. **Controversy or Debate**:
       - Is the event highly controversial or has it sparked significant debate?
       - Has the event garnered significant media attention and public interest?

    3. **Complexity**:
       - Does the event involve multiple factors, stakeholders, or causes that make it complex?
       - Does the event have deep historical roots or is it the culmination of long-term developments?

    4. **Personal Relevance**:
       - Does the event directly affect the reader or their community?
       - Is the event of particular interest to the reader due to economic implications, political affiliations, or social issues?

    5. Educational Purposes:
       - Would a timeline provide valuable learning or research information?

    Here is the information for the article:
    Title:{title}
    Text: {text}

    Based on the factors above, decide whether generating a timeline of events leading up to the key event in this article would be beneficial. 
    Your answer will include the need for this article to have a timeline with a score 1 - 5, 1 means unnecessary, 5 means necessary. It will also include the main reason for your choice.
    {format_instructions}    
    ANSWER:
    '''

    # Create the prompt template
    prompt = PromptTemplate(
        input_variables=["text", "title"],
        partial_variables={"format_instructions": format_instructions},
        template=template,
    )

        # Define the headline
    headline = test_data["Title"]
    body = test_data["Text"]

        # Format the prompt
    final_prompt = prompt.format(title=headline, text=body)

        # Generate content using the generative model
    response = llm.generate_content(
            final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
    final_response = clean_llm_score(response)
    # If LLM approves
    if final_response['score'] >=3:
        print("Timeline is necessary for this chosen article.\n")
        return True, None
    else:
        print("A timeline for this article is not required. \n")
        for part in final_response['Reason'].replace(". ", ".").split(". "):
            print(f"{part}\n")
        print("Hence I gave this a required timeline score of " + str(final_response['score']))
        output_error = "A timeline for this article is not required. \n" \
                    + "\n" +final_response['Reason'] + "\n"+ "\nHence this timeline received a necessity score of " \
                    + str(final_response['score'])  + "\n"
        return False, output_error

def get_article_dict(input_list, df_train, df_test):
    llm = genai.GenerativeModel("gemini-1.5-flash-latest")

    # Initialize the generative model
    class Event(BaseModel):
        Article_id: list = Field(description="Article ids that are most relevant for the generation of the timeline")
            

    output_parser = JsonOutputParser(pydantic_object=Event)

    # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    template = '''
    Task Description: Given the following test article, and the relevant tags of that article, and the contents of articles similar to it.
    You will only select the articles that are closest in similarity to the test article, \
    for which i will be able to leverage on to build a timeline upon. 
    Return the article ids for the chosen articles. 
    Ensure that the chosen articles are relevant in terms of geographical location, main topic and whether or not they are talking about the same event or topic.
    {text}

    {format_instructions}
    Check and ensure again that the output follows the format instructions above very strictly. 
    '''

    # Create the prompt template
    prompt = PromptTemplate(
        input_variables=["text"],
        partial_variables={"format_instructions": format_instructions},
        template=template,
    )

    final_prompt = prompt.format(text=input_list)
    response = llm.generate_content(
            final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
    new_output = re.search(r'\[[^\]]*\]', response.parts[0].text).group(0)
    article_keys =  json.loads(new_output)
    if not article_keys:
        print("No useful similar articles found in database for timeline generation.\n")
        sys.exit()
    
    similar_articles_dict = {}
    
    # Iterate over each test article in the filtered df_test
    for index, test_row in df_test.iterrows():
        test_cluster_label = test_row['Cluster_labels']
        
        # Filter df_train for the same cluster label
        df_train_cluster = df_train[df_train['Cluster_labels'] == test_cluster_label]
        
        # Find similar articles in df_train
        similar_indexes = []
        for train_index, train_row in df_train_cluster.iterrows():
            if train_row['st_id'] in article_keys:
                similar_indexes.append(train_index)
        
        # Store the result in the dictionary if there are at least 2 supporting articles
        if len(similar_indexes) >= 2:
            similar_articles_dict = {
                'Title': test_row['Title'],
                'indexes': similar_indexes,
                'Text': test_row['Text'],
            }
    if not similar_articles_dict:
        print("There are insufficient relevant articles to construct a meaningful timeline. ... Exiting execution now\n")
        return "generate_similar_error"
    else:
        # Print results 
        print("-"*80 + "\n")
        print(f"Test Article Title: << {similar_articles_dict['Title']}>>\n")
        print("Selected Supporting Article Titles:")
        for idx in similar_articles_dict['indexes']:
            print(f" - {df_train.loc[idx, 'Title']}")
        print("\n" + "-"*80)
        return similar_articles_dict
        
def generate_and_sort_timeline(similar_articles_dict, df_train, df_test):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class Event(BaseModel):
        Date: str = Field(description="The date of the event in YYYY-MM-DD format")
        Event: str = Field(description="A detailed description of the important event")
        Article: int = Field(description="The article number from which the event was extracted")

    output_parser = JsonOutputParser(pydantic_object=Event)

    # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    template = '''
    Given an article, containing a publication date, title, and content, your task is to construct a detailed timeline of events leading up to the main event described in the article.
    Begin by thoroughly analyzing the title, content, and publication date of the article to understand the main event in the article. 
    the dates are represented in YYYY-MM-DD format. Identify events, context, and any time references such as "last week," "last month," or specific dates. 
    The article could contain more or one key events. 
    If the article does not provide a publication date or any events leading up to the main event, return NAN in the Date field, and 0 i the Article Field

    Construct the Timeline:
    Chronological Order: Organize the events chronologically, using the publication dates and time references within the articles.
    Detailed Descriptions: Provide detailed descriptions of each event, explaining how it relates to the main event of the first article.
    Contextual Links: Use information from the articles to link events together logically and coherently.
    Handle Ambiguities: If an article uses ambiguous time references, infer the date based on the publication date of the article and provide a clear rationale for your inference.

    Contextual Links:
    External Influences: Mention any external influences (e.g., global conflicts, economic trends, scientific discoveries) that might have indirectly affected the events.
    Internal Issues: Highlight any internal issues or developments (e.g., political changes, organizational restructuring, societal movements) within the entities involved that might have impacted the events.
    Efforts for Improvement: Note any indications of efforts to improve the situation (e.g., policy changes, strategic initiatives, collaborative projects) despite existing challenges.

    Be as thorough and precise as possible, ensuring the timeline accurately reflects the sequence and context of events leading to the main event.

    Article:
    {text}

    {format_instructions}
    Check and ensure again that the output follows the format instructions above very strictly. 
    '''

    prompt = PromptTemplate(
        input_variables=["text"],
        partial_variables={"format_instructions": format_instructions},
        template=template
    )
    
    def generate_individual_timeline(date_text_triples):
        s =  f'Article {date_text_triples[0]}: Publication date: {date_text_triples[1]} Article Text: {date_text_triples[2]}'
        final_prompt = prompt.format(text=s)
        response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
        # Check if Model returns correct format 
        if '[' in response.parts[0].text or '{' in response.parts[0].text:
            result = response.parts[0].text
        else:
            retry_response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
            try:
                result = retry_response.parts[0].text
            except ValueError:
                print("ERROR: There were issues with the generation of the timeline. The timeline could not be generated")
                return
        return result
    
    def process_articles(df_train):
        df_retrieve = df_train.loc[similar_articles_dict['indexes']]
        df_retrieve = pd.concat([df_retrieve, df_test], axis=0).iloc[::-1].reset_index(drop=True)

        # Prepare texts and publication dates
        indiv_numbers = list(range(1,len(df_retrieve)+1))
        indiv_text = df_retrieve['combined'].tolist()
        indiv_dates = df_retrieve['Publication_date'].tolist()
        date_text_triples = list(zip(indiv_numbers, indiv_text, indiv_dates))

        dict_of_timelines = {}
        
        with ThreadPoolExecutor(max_workers=len(date_text_triples)) as executor:
            futures = {executor.submit(generate_individual_timeline, date_text_triple): date_text_triple for date_text_triple in date_text_triples}
            i = 0
            for future in as_completed(futures):
                dict_of_timelines[i] = future.result()
                i += 1
        return dict_of_timelines, df_retrieve
    
    timeline_dic, df_retrieve = process_articles(df_train)
    # df_retrieve = df_train.loc[similar_articles_dict['indexes']]
    # df_retrieve = pd.concat([df_retrieve, df_test], axis=0).iloc[::-1].reset_index(drop=True)
    # return df_retrieve
    print("The first timeline has been generated\n")
    generated_timeline = []
    for idx, line in timeline_dic.items():
        indiv_timeline = clean_output(line)
        if type(indiv_timeline) == list:
            for el in indiv_timeline:
                generated_timeline.append(el)
        else:
            generated_timeline.append(indiv_timeline)
    
    unsorted_timeline = []
    for event in generated_timeline:
        article_index = event["Article"] - 1
        event["Article_id"] = df_retrieve.iloc[article_index].st_id
    for event in generated_timeline:
        del event["Article"]
        unsorted_timeline.append(event)  
        
    timeline = sorted(unsorted_timeline, key=lambda x:x['Date'])
    finished_timeline = [event for event in timeline if event['Date'].lower()!= 'nan']
    for i in range(len(finished_timeline)):
        date = finished_timeline[i]['Date']
        if date.endswith('-XX-XX') or date.endswith('00-00'):
            finished_timeline[i]['Date'] = date[:4]
        elif date.endswith('-XX') or date.endswith('00'):
            finished_timeline[i]['Date'] = date[:7]
    return finished_timeline, df_retrieve

def enhance_timeline(timeline):
    print("\nProceeding to enhance the timeline...\n")
    llm = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')

    class Event(BaseModel):
            Date: str = Field(description="The date of the event in YYYY-MM-DD format")
            Event: str = Field(description="A detailed description of the event")
            Contextual_Annotation: str = Field(description="Contextual anecdotes of the event.")
            Article_id: list = Field(description="The article id(s) from which the event was extracted")

    parser = JsonOutputParser(pydantic_object=Event)

    template = '''
        You are given a timeline of events, your task is to enhance this timeline by improving its clarity and contextual information.
        IF the same event occurs on the exact same date, merge these events to avoid redundancy, and add the article ids to a list. 
        Add contextual annotations by providing brief annotations for major events to give additional context and improve understanding.
        Only retain important information that would be value-add when the general public reads the information.

        Initial Timeline:
        {text}

        {format_instructions}
        Ensure that the format follows the example output format strictly before returning the output.
        '''
    prompt = PromptTemplate(
            input_variables=["text"],
            template=template
        )
            
    def generate_enhanced(batch):
        batch_timeline_text = json.dumps(batch)
        final_prompt = prompt.format(text=batch_timeline_text, format_instructions=parser.get_format_instructions())
        response = llm.generate_content(final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
        data = extract_json_from_string(response.parts[0].text)
        return data

    def process_articles(timeline):
        results = []
        batches = split_batches(timeline, max_batch_size=30)
        num_batches = len(batches)

        with ThreadPoolExecutor(max_workers=num_batches) as executor:
            print("Processing batches simultaneously now...\n")
            futures = {executor.submit(generate_enhanced, batch): batch for batch in batches}
            for future in as_completed(futures):
                indiv_batch = future.result()
                for event in indiv_batch:
                    results.append(event)
        return results

    full_enhanced = process_articles(timeline)
    sorted_timeline = sorted(full_enhanced, key=lambda x:x['Date'])
    print("Finished enhancing the timeline\n")
    return sorted_timeline

def save_enhanced_timeline(enhanced_timeline, df_retreive):
    """
    Save the enhanced timeline to a JSON file.

    Parameters:
    enhanced_timeline (list): The enhanced timeline data.
    output_path (str): The file path where the JSON will be saved.
    """
    
    def edit_timeline(timeline):
        for event in timeline:
            new_date = format_timeline_date(event['Date'])
            event['Date'] = new_date
            # Check if Contextual Annotation empty    
            if not event['Contextual_Annotation']:
                event['Contextual_Annotation'] = "NONE"
        return timeline

    edited_timeline = edit_timeline(enhanced_timeline)
    id_url_pairs = {}
    for event in edited_timeline:
            id_list = event['Article_id']
            for i in range(len(id_list)):
                id = id_list[i]
                id_url = df_retreive[df_retreive['st_id'] == id]['article_url'].values
                id_url_pairs[id] = id_url
                id_list[i] = id_url
            event['Article_URL'] = [array.tolist() for array in id_list if array.size > 0]
            event.pop('Article_id')
    
    json_data = json.dumps(edited_timeline, indent=4, ensure_ascii=False)
    return json_data

def generate_save_timeline(relevant_articles, df_train, df_test):
    similar_articles = get_article_dict(relevant_articles, df_train, df_test)
    if similar_articles == "generate_similar_error":
        return "Error02"
    generated_timeline, df_retrieve = generate_and_sort_timeline(similar_articles, df_train, df_test)
    final_timeline = enhance_timeline(generated_timeline)
    final_timeline = save_enhanced_timeline(final_timeline, df_retrieve)
    return final_timeline



def main_hierarchical(test_article, df_train):
    #check if the test point is worth generating a timeline. 
    to_generate, reason01 = to_generate_timeline(test_article)
    if to_generate:
        df_test = pd.DataFrame([test_article])
        relevant_articles, df_train, df_test = generate_clusters(df_train, df_test)
        final_timeline = generate_save_timeline(relevant_articles, df_train, df_test)
        if final_timeline=="Error02":
            reason02 = "There are insufficient relevant articles to construct a meaningful timeline. "
            return "generate_similar_articles_error", reason02
        return final_timeline, None
    else:
        return "to_generate_error", reason01    

def load_mongodb():
    print("Fetching article data from MongoDB...\n")
    
    # Connect to the MongoDB client
    try:
        db = mongo_client[config["database"]["name"]]
        train_documents = db[config["database"]["train_collection"]].find()
        print("Train data successfully fetched from MongoDB\n")
    except Exception as error: 
        print(f"Unable to fetch train data from MongoDB. Check your connection the database...\nERROR: {error}\n")
        sys.exit()   
    try:
        test_docs = db[config["database"]["test_collection"]].find()
        print("Test data successfully fetched from MongoDB\n")
    except:
        print(f"Unable to fetch test data from MongoDB. Check your connection the database...\nERROR: {error}\n")
        sys.exit()
    return train_documents, test_docs

def gradio_generate_timeline(index):
    print("Starting Timeline Generation\n")
    
    train_database, test_database = load_mongodb()
    
    def count_test_length(test_database):
        count = 0
        for doc in test_database:
            count += 1
        return count

    # Select the test article based on the given index
    test_article = test_database[index-1]
    
    df_train = pd.DataFrame(train_database)
    
    # Validate the index
    if index < 0 or index >= count_test_length(test_database):
        return {"error": "Index out of range"}

    # Run this after gradio workflow tested
    timeline, fail_reason = main_hierarchical(test_article, df_train)
    
    
    # def for_testing():
    #     with open('../public/data_upload/final_timeline.json', 'r') as json_file:
    #         timeline = json.load(json_file)
    #     fail_reason = "NIL"
    #     return timeline, fail_reason
    # timeline, fail_reason = for_testing()
    
    
    # Pull database
    db = mongo_client[config["database"]["name"]]
    
    # Get collection from database
    gen_timeline_documents = db[config["database"]["timelines_collection"]]
    
    test_article_id = test_article['st_id']
    test_article_title = test_article['Title']
    # If timeline should not be generated
    if timeline == "to_generate_error" or timeline == "generate_similar_articles_error":
        
        # Timeline instance to return for error message
        timeline_return = {"Article_id": test_article_id, 
                           "Article_Title": test_article_title, 
                           "error": fail_reason}
        
        # Timeline instance to export to MongoDB
        timeline_export = {"Article_id": test_article_id, 
                           "Article_Title": test_article_title, 
                           "Timeline": "null"}
        try:
            # Insert result into collection
            gen_timeline_documents.insert_one(timeline_export)
            print("Data successfully saved to MongoDB")
        except Exception as error:
            print(f"Unable to save timeline to database. Check your connection the database...\nERROR: {error}\n")
            sys.exit()
            
    else:
        # Convert the timeline to JSON
        timeline_json = json.dumps(timeline)
        timeline_return = {"Article_id": test_article_id, 
                           "Article_Title": test_article_title, 
                           "Timeline": timeline_json}
        timeline_export = timeline_return
        
        # Send the timeline data to MongoDB
        try:
            # Insert result into collection
            gen_timeline_documents.insert_one(timeline_export)
            print("Data successfully saved to MongoDB")
        except Exception as error:
            print(f"Unable to save timeline to database. Check your connection the database...\nERROR: {error}\n")
            sys.exit()
    return timeline_return

def display_timeline(timeline_str):
    print("Displaying timeline on Gradio Interface \n")
    if isinstance(timeline_str, str):
        # Used for testing
        timeline_list = json.loads(timeline_str)
    else:
        timeline_list = timeline_str
    display_list= timeline_list[:len(timeline_list)//2]
    html_content = '''
                    <div style='text-align: center;'><strong>
                    First half of events in the timeline:\n
                    </strong></div>
                    <div style='padding: 10px;'>'''
    for event in display_list:
        html_content += f"<h3>{event['Date']}</h3>"
        html_content += f"<p><strong>Event:</strong> {event['Event']}</p>"
        html_content += f"<p><strong>Contextual Annotation:</strong> {event['Contextual_Annotation']}</p>"
        #Display only the first 60 chars of the first article url
        html_content += "<p><strong>Article IDs:</strong> " + event['Article_URL'][0][0][:60] + "</p>"
        html_content += "<hr>"
    html_content += "</div>"
    return html_content

def display_gradio():
    with gr.Blocks(title="Article Timeline Generator", theme='snehilsanyal/scikit-learn') as gradio_timeline:
        gr.Markdown("""
            <h1 style='text-align: center;'>
            Timeline Generator
            </h1>
            <hr>
            <h3>
            Choose an article index to generate a timeline using the test database.
            </h3>
        """)
        
        with gr.Column():
            with gr.Row():
                with gr.Column():
                    input_test_index = gr.Number(label="Test Article Index. Choose an index from 1-7 (Number of test articles)", value=0)
                    hidden_article_id = gr.Textbox(visible=False)

                    with gr.Row():
                        clear_button = gr.Button("Reset index")
                        generate_button = gr.Button("Generate Timeline")
                    shown_article_title = gr.Textbox(label="Title of chosen article")
                    output_timeline = gr.JSON(label="Generated Timeline in JSON format", visible=False)
                    gr.Markdown('''
                                If Error message is not shown past the 7 second mark, a timeline is necessary for the chosen article. 
                                ''')
                    output_error = gr.Textbox(label="Error Message:")  
                    user_download_button = gr.DownloadButton("Download Generated Timeline")
                
                with gr.Column():
                    show_timeline_button = gr.Button("Show Generated Timeline")
                    output_timeline_HTML = gr.HTML()
        
        clear_button.click(lambda: 0, None, input_test_index)
        
        def handle_generate_timeline(index):
                result = gradio_generate_timeline(index)
                article_id = result['Article_id']
                article_title = result['Article_Title']
                if "error" in result:
                    timeline_error = result["error"]
                    return timeline_error, None, article_id, article_title
                else:
                    
                    timeline = result['Timeline']
                    return "NIL, Press Show Generated Timeline to display generated timeline", timeline, article_id, article_title

        generate_button.click(
                handle_generate_timeline,
                inputs=input_test_index,
                outputs=[output_error, output_timeline, hidden_article_id, shown_article_title]
            )
        show_timeline_button.click(
                display_timeline,
                inputs=output_timeline,
                outputs=output_timeline_HTML
            )
        user_download_button.click(
            inputs=output_timeline
        )
    gradio_timeline.launch(inbrowser=True)        

In [99]:
display_gradio()

Running on local URL:  http://127.0.0.1:7866

To create a public link, set `share=True` in `launch()`.


Starting Timeline Generation

Fetching article data from MongoDB...

Train data successfully fetched from MongoDB

Test data successfully fetched from MongoDB

Data successfully saved to MongoDB
Displaying timeline on Gradio Interface 



In [None]:
print(after_timeline)

In [37]:
retrieval

Unnamed: 0,_id,Text,Title,embeddings,combined,tags,tags_embeddings,Title_embeddings,Publication_date,article_url,st_id,phrase_Bert_tags_embeddings,Cluster_labels
0,6666ac3f6619e3e180cbbadf,BRUSSELS - European Council president Charles ...,EU leaders to hold emergency virtual summit on...,"[-0.015096, 0.010564, 0.004007, 0.049611, -0.0...",Title: EU leaders to hold emergency virtual su...,"[EU, Emergency summit, Israel-Hamas, Gaza Stri...","[0.002247289987280965, 0.0027428099419921637, ...","[0.010236, -0.079974, -0.027459, 0.040262, -0....",2023-10-15,https://www.straitstimes.com/world/middle-east...,st_1155048,"[[-0.014690160751342773, 0.142982617020607, 0....",169
1,6666ac3d6619e3e180cbba1e,GENEVA - The heads of several major Unite...,UN bodies make united call for humanitarian ce...,"[0.023583, 0.036601, 0.038033, 0.012386, -0.04...",Title: UN bodies make united call for humanita...,"[Gaza, Ceasefire, United Nations, Human Rights...","[0.019476890563964844, 0.046852633357048035, 0...","[-0.005602, 0.028372, 0.008979, -0.023546, -0....",2023-11-06,https://www.straitstimes.com/world/middle-east...,st_1160289,"[[-0.24365365505218506, 0.3214958608150482, -1...",169
2,6666ac3d6619e3e180cbb9f7,EU countries are still discussing the idea of ...,EU continues talks on humanitarian ceasefire i...,"[0.034244, 0.034936, 0.002737, 0.039488, -0.05...",Title: EU continues talks on humanitarian ceas...,"[EU, Ceasefire, Israel-Hamas conflict, Gaza, S...","[0.004376180469989777, 0.021570585668087006, -...","[-0.011841, 0.008162, -0.005965, 0.006027, -0....",2023-10-23,https://www.straitstimes.com/asia/eu-continues...,st_1157082,"[[-0.014690160751342773, 0.142982617020607, 0....",169
3,6666ac3d6619e3e180cbb70a,WASHINGTON -The White House on Thursday said ...,White House suggests 'pauses' in Israel-Hamas ...,"[-0.038704, 0.107234, 0.007981, -0.002003, -0....",Title: White House suggests 'pauses' in Israel...,"[Israel-Hamas conflict, Humanitarian aid, Gaza...","[0.0010268694022670388, 0.030386492609977722, ...","[-0.037108, 0.076929, -0.027228, 0.011765, -0....",2023-11-03,https://www.straitstimes.com/world/white-house...,st_1159581,"[[0.15862636268138885, 0.06434283405542374, -1...",169
4,6666ac3d6619e3e180cbb57e,WASHINGTON – Foreign ministers of the Group of...,G-7 foreign ministers support extension of pau...,"[0.021042, 0.064334, 0.021708, -0.012606, -0.0...",Title: G-7 foreign ministers support extension...,"[G-7, Gaza Conflict, Truce, Hostage Release, H...","[0.020464923232793808, 0.028035195544362068, 0...","[0.006454, 0.052647, 0.006514, 3e-05, -0.00378...",2023-11-29,https://www.straitstimes.com/world/middle-east...,st_1165295,"[[-0.2771241366863251, 0.24954593181610107, -0...",169
5,6666ac3d6619e3e180cbb535,KHAN YOUNIS/TEL AVIV - Gazans desperate f...,"Gazans call for truce to be extended, Israelis...","[-0.028345, 0.073631, 0.008542, 0.0023, -0.035...","Title: Gazans call for truce to be extended, I...","[Gaza, Israel, Truce, Hamas, Hostages, Conflict]","[0.017820250242948532, 0.06469585746526718, -0...","[0.0163, 0.076505, -0.018098, -0.004668, -0.04...",2023-11-27,https://www.straitstimes.com/world/middle-east...,st_1164990,"[[-0.24365365505218506, 0.3214958608150482, -1...",169
6,6666ac3d6619e3e180cbb394,TOKYO – Japan will provide US$65 million (S$88...,Japan to provide $88 million in additional hum...,"[0.03002, 0.071634, 0.032291, 0.027776, 0.0074...",Title: Japan to provide $88 million in additio...,"[Japan, Palestine, Humanitarian aid, Gaza conf...","[0.02514977753162384, 0.028258027508854866, -0...","[0.003842, 0.051284, 0.011968, 0.000421, 0.003...",2023-11-04,https://www.straitstimes.com/asia/japan-to-pro...,st_1159793,"[[-0.46366867423057556, 0.14384961128234863, -...",169


In [76]:
def get_timeline():
    timeline = [
        {
            "Date": "07 October 2023",
            "Event": "Hamas launched a surprise attack on southern Israel, resulting in the deaths of 1,200 people, including babies and children, and the capture of over 200 hostages. This marked the start of the conflict.",
            "Contextual_Annotation": "This attack by Hamas triggered Israel's declaration of war on Gaza, marking the beginning of a devastating conflict.",
            "Article_id": [
                "st_1165295",
                "st_1160289",
                "st_1164990",
                "st_1159581"
            ]
        },
        {
            "Date": "08 October 2023",
            "Event": "Hamas launched a series of attacks on Israelis, which the article describes as \"brutal terrorist attacks.\"",
            "Contextual_Annotation": "Continued attacks by Hamas escalated the conflict and intensified the brutality of the situation.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "15 October 2023",
            "Event": "European Council President Charles Michel announced a video conference summit of EU leaders to discuss the Israel-Hamas conflict, scheduled for Tuesday, October 17th at 5:30 PM CET (11:30 PM Singapore time).",
            "Contextual_Annotation": "The international community began to convene to discuss the conflict and consider possible solutions.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "17 October 2023",
            "Event": "The EU leaders held the emergency virtual summit, focusing on the situation in Gaza and Israel's response to the Hamas attacks. The summit aimed to establish a unified EU position and course of action regarding the conflict.",
            "Contextual_Annotation": "EU leaders sought to coordinate their response to the conflict and develop a unified approach.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "23 October 2023",
            "Event": "EU foreign ministers met in Luxembourg to discuss the ongoing war between Israel and Hamas, specifically focusing on the possibility of a humanitarian ceasefire and alternative methods of delivering aid to Palestinians in Gaza. Swedish foreign minister Tobias Billstrom emphasized that the primary focus should be on providing aid, suggesting a UN-proposed humanitarian corridor as a preferred approach. Josep Borrell, EU foreign policy chief, expressed support for a \"humanitarian pause,\" but certain ministers within the bloc expressed reservations about this idea.",
            "Contextual_Annotation": "The EU continued to engage in discussions about the conflict, focusing on humanitarian concerns and exploring potential solutions, including a possible ceasefire.",
            "Article_id": [
                "st_1157082"
            ]
        },
        {
            "Date": "27 October 2023",
            "Event": "The conflict in Gaza intensified, marking one month since the initial Hamas attack on Israel.",
            "Contextual_Annotation": "The conflict continued to escalate with no immediate signs of de-escalation.",
            "Article_id": [
                "st_1160289"
            ]
        },
        {
            "Date": "01 November 2023",
            "Event": "Japan's Foreign Minister Yoko Kamikawa embarked on a tour of Israel and Jordan, expressing Japan's concern over the escalating conflict in Gaza and its commitment to providing humanitarian aid to Palestinians.",
            "Contextual_Annotation": "Japan actively engaged in diplomatic efforts and humanitarian aid provision to address the crisis.",
            "Article_id": [
                "st_1159793",
                "st_1159581"
            ]
        },
        {
            "Date": "01 November 2023",
            "Event": "A crossing to Egypt opened, allowing 79 U.S. citizens to leave the Gaza Strip.",
            "Contextual_Annotation": "A limited evacuation of U.S. citizens from Gaza was facilitated.",
            "Article_id": [
                "st_1159581"
            ]
        },
        {
            "Date": "02 November 2023",
            "Event": "More U.S. citizens were expected to be released from Gaza. 55 additional aid trucks entered Gaza, bringing the total to over 220. President Joe Biden spoke about the need for a pause in the conflict to facilitate the release of hostages held by Hamas. Senator Dick Durbin called for a ceasefire. Senator Patty Murray called for a humanitarian pause to allow aid delivery to civilians. United Nations experts urged a humanitarian ceasefire in Gaza, citing the risk of genocide for Palestinians.",
            "Contextual_Annotation": "International calls for a ceasefire or humanitarian pauses intensified, with concerns about the humanitarian crisis in Gaza growing.",
            "Article_id": [
                "st_1159581"
            ]
        },
        {
            "Date": "03 November 2023",
            "Event": "U.N. aid chief Martin Griffiths called for humanitarian 'pauses' in Israel's bombardment of Gaza to facilitate aid deliveries, which had significantly declined compared to pre-conflict levels. During her visit, Ms. Kamikawa met with both Israeli Foreign Minister Eli Cohen and Palestinian counterpart Riyad al-Maliki, reiterating Japan's support for a two-state solution and emphasizing the importance of peaceful coexistence between Israel and Palestine to prevent future conflicts. The White House proposed temporary and localized pauses in the conflict to facilitate humanitarian aid and the safe evacuation of people from Gaza. The White House reiterated its opposition to a full ceasefire. The U.S. military officers traveled to Israel to share lessons learned from past urban conflicts. The U.S. continued to provide weapons and advice to Israel, urging them to minimize civilian casualties.",
            "Contextual_Annotation": "International efforts to address the humanitarian crisis in Gaza continued, with calls for pauses in fighting and aid delivery. Japan highlighted its support for a two-state solution and peaceful coexistence, while the U.S. continued to support Israel with weapons and advice, emphasizing the need to minimize civilian casualties.",
            "Article_id": [
                "st_1160289",
                "st_1159793",
                "st_1159581"
            ]
        },
        {
            "Date": "04 November 2023",
            "Event": "Japan announced its decision to provide an additional US$65 million in humanitarian aid to Palestinians in response to the crisis in Gaza. This aid aims to address the humanitarian needs of Palestinians affected by the ongoing conflict. Japan also expressed its intention to provide material aid to war-torn Gaza, highlighting the need for immediate support to alleviate the suffering of civilians. Japan's commitment to humanitarian aid for Palestinians and its call for peaceful coexistence comes against the backdrop of a deepening crisis in Gaza, characterized by escalating violence and international pressure for a humanitarian pause in hostilities. While Ms. Kamikawa refrained from directly commenting on the legality of Israel's actions in Gaza, she emphasized the importance of upholding human rights and preventing needless civilian casualties.",
            "Contextual_Annotation": "Japan increased its humanitarian aid commitment to Palestinians in Gaza, emphasizing the need for immediate support and highlighting the importance of human rights and preventing civilian casualties.",
            "Article_id": [
                "st_1159793"
            ]
        },
        {
            "Date": "06 November 2023",
            "Event": "The heads of several major UN bodies issued a joint statement calling for an immediate humanitarian ceasefire in Gaza, citing the dire humanitarian situation and unacceptable conditions for the civilian population. Palestinian envoy to the UN Riyad Mansour urged for a full ceasefire, emphasizing the need to save lives and cease all hostilities. Israel maintained its stance against a ceasefire, emphasizing the release of hostages held by Hamas militants as a precondition for any ceasefire agreement.",
            "Contextual_Annotation": "The United Nations and Palestinian representatives called for a ceasefire, while Israel maintained its position that the release of hostages was a precondition for any ceasefire agreement.",
            "Article_id": [
                "st_1160289"
            ]
        },
        {
            "Date": "09 November 2023",
            "Event": "Japan is scheduled to host the foreign ministers of the G7 nations in Tokyo, providing a platform for discussions on the Gaza crisis and potential solutions. This event is likely to further amplify international attention and pressure on all parties involved to find a peaceful resolution.",
            "Contextual_Annotation": "The upcoming G7 meeting in Japan provided an opportunity for international discussion and potential solutions to the crisis in Gaza.",
            "Article_id": [
                "st_1159793"
            ]
        },
        {
            "Date": "19 November 2023",
            "Event": "Yemen's Iran-backed Houthis seized the Galaxy Leader commercial ship and its crew, raising concerns about the safety of international shipping lanes.",
            "Contextual_Annotation": "This incident in Yemen raised concerns about maritime security and potentially destabilized the region.",
            "Article_id": [
                "st_1165295"
            ]
        },
        {
            "Date": "23 November 2023",
            "Event": "Israel responded to Hamas's attack with a full-scale assault on Gaza, leading to the deaths of 14,800 Palestinians, including 40% children under the age of 18.",
            "Contextual_Annotation": "Israel's counteroffensive resulted in a significant loss of life in Gaza, highlighting the human cost of the conflict.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "24 November 2023",
            "Event": "A truce agreement was reached between Hamas and Israel, providing Gaza with its first respite after seven weeks of Israeli bombardment. A four-day truce was initiated, aiming to provide a temporary ceasefire and allow for negotiations.",
            "Contextual_Annotation": "The truce brought a temporary end to the fighting, allowing for a period of negotiations and potential for a more permanent solution.",
            "Article_id": [
                "st_1165295",
                "st_1164990"
            ]
        },
        {
            "Date": "26 November 2023",
            "Event": "The first phase of hostage releases began, with 58 hostages being freed from Gaza.",
            "Contextual_Annotation": "The release of hostages marked a significant step towards de-escalation and potential peace talks.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "27 November 2023",
            "Event": "Negotiators from Egypt, Qatar, and the United States worked to persuade Hamas and Israel to extend the truce, as Gazans appealed for its continuation. The release of additional hostages was planned for the same day.",
            "Contextual_Annotation": "International actors engaged in efforts to extend the truce and facilitate a lasting peace agreement.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "28 November 2023",
            "Event": "G-7 foreign ministers issued a joint statement supporting the extension of the pause in fighting in Gaza, urging the release of all hostages and emphasizing Israel's right to defend itself while stressing the importance of protecting civilians. Hamas released 12 more hostages, bringing the total number released since the truce began to 81. Israel released 30 Palestinian prisoners in return.",
            "Contextual_Annotation": "The G7 expressed its support for the truce and urged a peaceful resolution, emphasizing the importance of civilian protection. The exchange of hostages continued as part of the ongoing negotiations.",
            "Article_id": [
                "st_1165295"
            ]
        }
    ]
    return timeline
timeline = get_timeline()

In [77]:
id_url_pairs = {}
for event in timeline:
        id_list = event['Article_id']
        for i in range(len(id_list)):
            id = id_list[i]
            id_url = retrieval[retrieval['st_id'] == id]['article_url'].values
            id_url_pairs[id] = id_url
            id_list[i] = id_url
        event['Article_id'] = [array.tolist() for array in id_list if array.size > 0]
        # event.pop('Article_id')
        # event[Article_u]

In [78]:
timeline

[{'Date': '07 October 2023',
  'Event': 'Hamas launched a surprise attack on southern Israel, resulting in the deaths of 1,200 people, including babies and children, and the capture of over 200 hostages. This marked the start of the conflict.',
  'Contextual_Annotation': "This attack by Hamas triggered Israel's declaration of war on Gaza, marking the beginning of a devastating conflict.",
  'Article_id': [['https://www.straitstimes.com/world/middle-east/g7-foreign-ministers-support-extension-of-pause-in-fighting-in-gaza'],
   ['https://www.straitstimes.com/world/middle-east/un-bodies-make-united-call-for-humanitarian-ceasefire-in-gaza'],
   ['https://www.straitstimes.com/world/middle-east/gazans-call-for-truce-to-be-extended-israelis-divided-on-the-issue'],
   ['https://www.straitstimes.com/world/white-house-suggests-pauses-in-israel-hamas-conflict-to-get-people-out']]},
 {'Date': '08 October 2023',
  'Event': 'Hamas launched a series of attacks on Israelis, which the article describes

In [56]:
uniq = []
for event in timeline:
    for id in event['Article_id']:
        if id not in uniq:
            uniq.append(id)
print(uniq)
print(len(uniq))

[array([], dtype=object), array(['https://www.straitstimes.com/world/middle-east/un-bodies-make-united-call-for-humanitarian-ceasefire-in-gaza'],
      dtype=object), array(['https://www.straitstimes.com/world/white-house-suggests-pauses-in-israel-hamas-conflict-to-get-people-out'],
      dtype=object), array([], dtype=object), array(['https://www.straitstimes.com/world/middle-east/g7-foreign-ministers-support-extension-of-pause-in-fighting-in-gaza'],
      dtype=object), array([], dtype=object), array(['https://www.straitstimes.com/world/middle-east/eu-leaders-to-hold-emergency-virtual-summit-on-middle-east-on-tuesday-0'],
      dtype=object), array(['https://www.straitstimes.com/asia/eu-continues-talks-on-humanitarian-ceasefire-in-israel-hamas-war'],
      dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=object), array([], dtype=

  if id not in uniq:


In [None]:
def get_timeline():
    timeline = [
        {
            "Date": "07 October 2023",
            "Event": "Hamas launched a surprise attack on southern Israel, resulting in the deaths of 1,200 people, including babies and children, and the capture of over 200 hostages. This marked the start of the conflict.",
            "Contextual_Annotation": "This attack by Hamas triggered Israel's declaration of war on Gaza, marking the beginning of a devastating conflict.",
            "Article_id": [
                "st_1165295",
                "st_1160289",
                "st_1164990",
                "st_1159581"
            ]
        },
        {
            "Date": "08 October 2023",
            "Event": "Hamas launched a series of attacks on Israelis, which the article describes as \"brutal terrorist attacks.\"",
            "Contextual_Annotation": "Continued attacks by Hamas escalated the conflict and intensified the brutality of the situation.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "15 October 2023",
            "Event": "European Council President Charles Michel announced a video conference summit of EU leaders to discuss the Israel-Hamas conflict, scheduled for Tuesday, October 17th at 5:30 PM CET (11:30 PM Singapore time).",
            "Contextual_Annotation": "The international community began to convene to discuss the conflict and consider possible solutions.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "17 October 2023",
            "Event": "The EU leaders held the emergency virtual summit, focusing on the situation in Gaza and Israel's response to the Hamas attacks. The summit aimed to establish a unified EU position and course of action regarding the conflict.",
            "Contextual_Annotation": "EU leaders sought to coordinate their response to the conflict and develop a unified approach.",
            "Article_id": [
                "st_1155048"
            ]
        },
        {
            "Date": "23 October 2023",
            "Event": "EU foreign ministers met in Luxembourg to discuss the ongoing war between Israel and Hamas, specifically focusing on the possibility of a humanitarian ceasefire and alternative methods of delivering aid to Palestinians in Gaza. Swedish foreign minister Tobias Billstrom emphasized that the primary focus should be on providing aid, suggesting a UN-proposed humanitarian corridor as a preferred approach. Josep Borrell, EU foreign policy chief, expressed support for a \"humanitarian pause,\" but certain ministers within the bloc expressed reservations about this idea.",
            "Contextual_Annotation": "The EU continued to engage in discussions about the conflict, focusing on humanitarian concerns and exploring potential solutions, including a possible ceasefire.",
            "Article_id": [
                "st_1157082"
            ]
        },
        {
            "Date": "27 October 2023",
            "Event": "The conflict in Gaza intensified, marking one month since the initial Hamas attack on Israel.",
            "Contextual_Annotation": "The conflict continued to escalate with no immediate signs of de-escalation.",
            "Article_id": [
                "st_1160289"
            ]
        },
        {
            "Date": "01 November 2023",
            "Event": "Japan's Foreign Minister Yoko Kamikawa embarked on a tour of Israel and Jordan, expressing Japan's concern over the escalating conflict in Gaza and its commitment to providing humanitarian aid to Palestinians.",
            "Contextual_Annotation": "Japan actively engaged in diplomatic efforts and humanitarian aid provision to address the crisis.",
            "Article_id": [
                "st_1159793",
                "st_1159581"
            ]
        },
        {
            "Date": "01 November 2023",
            "Event": "A crossing to Egypt opened, allowing 79 U.S. citizens to leave the Gaza Strip.",
            "Contextual_Annotation": "A limited evacuation of U.S. citizens from Gaza was facilitated.",
            "Article_id": [
                "st_1159581"
            ]
        },
        {
            "Date": "02 November 2023",
            "Event": "More U.S. citizens were expected to be released from Gaza. 55 additional aid trucks entered Gaza, bringing the total to over 220. President Joe Biden spoke about the need for a pause in the conflict to facilitate the release of hostages held by Hamas. Senator Dick Durbin called for a ceasefire. Senator Patty Murray called for a humanitarian pause to allow aid delivery to civilians. United Nations experts urged a humanitarian ceasefire in Gaza, citing the risk of genocide for Palestinians.",
            "Contextual_Annotation": "International calls for a ceasefire or humanitarian pauses intensified, with concerns about the humanitarian crisis in Gaza growing.",
            "Article_id": [
                "st_1159581"
            ]
        },
        {
            "Date": "03 November 2023",
            "Event": "U.N. aid chief Martin Griffiths called for humanitarian 'pauses' in Israel's bombardment of Gaza to facilitate aid deliveries, which had significantly declined compared to pre-conflict levels. During her visit, Ms. Kamikawa met with both Israeli Foreign Minister Eli Cohen and Palestinian counterpart Riyad al-Maliki, reiterating Japan's support for a two-state solution and emphasizing the importance of peaceful coexistence between Israel and Palestine to prevent future conflicts. The White House proposed temporary and localized pauses in the conflict to facilitate humanitarian aid and the safe evacuation of people from Gaza. The White House reiterated its opposition to a full ceasefire. The U.S. military officers traveled to Israel to share lessons learned from past urban conflicts. The U.S. continued to provide weapons and advice to Israel, urging them to minimize civilian casualties.",
            "Contextual_Annotation": "International efforts to address the humanitarian crisis in Gaza continued, with calls for pauses in fighting and aid delivery. Japan highlighted its support for a two-state solution and peaceful coexistence, while the U.S. continued to support Israel with weapons and advice, emphasizing the need to minimize civilian casualties.",
            "Article_id": [
                "st_1160289",
                "st_1159793",
                "st_1159581"
            ]
        },
        {
            "Date": "04 November 2023",
            "Event": "Japan announced its decision to provide an additional US$65 million in humanitarian aid to Palestinians in response to the crisis in Gaza. This aid aims to address the humanitarian needs of Palestinians affected by the ongoing conflict. Japan also expressed its intention to provide material aid to war-torn Gaza, highlighting the need for immediate support to alleviate the suffering of civilians. Japan's commitment to humanitarian aid for Palestinians and its call for peaceful coexistence comes against the backdrop of a deepening crisis in Gaza, characterized by escalating violence and international pressure for a humanitarian pause in hostilities. While Ms. Kamikawa refrained from directly commenting on the legality of Israel's actions in Gaza, she emphasized the importance of upholding human rights and preventing needless civilian casualties.",
            "Contextual_Annotation": "Japan increased its humanitarian aid commitment to Palestinians in Gaza, emphasizing the need for immediate support and highlighting the importance of human rights and preventing civilian casualties.",
            "Article_id": [
                "st_1159793"
            ]
        },
        {
            "Date": "06 November 2023",
            "Event": "The heads of several major UN bodies issued a joint statement calling for an immediate humanitarian ceasefire in Gaza, citing the dire humanitarian situation and unacceptable conditions for the civilian population. Palestinian envoy to the UN Riyad Mansour urged for a full ceasefire, emphasizing the need to save lives and cease all hostilities. Israel maintained its stance against a ceasefire, emphasizing the release of hostages held by Hamas militants as a precondition for any ceasefire agreement.",
            "Contextual_Annotation": "The United Nations and Palestinian representatives called for a ceasefire, while Israel maintained its position that the release of hostages was a precondition for any ceasefire agreement.",
            "Article_id": [
                "st_1160289"
            ]
        },
        {
            "Date": "09 November 2023",
            "Event": "Japan is scheduled to host the foreign ministers of the G7 nations in Tokyo, providing a platform for discussions on the Gaza crisis and potential solutions. This event is likely to further amplify international attention and pressure on all parties involved to find a peaceful resolution.",
            "Contextual_Annotation": "The upcoming G7 meeting in Japan provided an opportunity for international discussion and potential solutions to the crisis in Gaza.",
            "Article_id": [
                "st_1159793"
            ]
        },
        {
            "Date": "19 November 2023",
            "Event": "Yemen's Iran-backed Houthis seized the Galaxy Leader commercial ship and its crew, raising concerns about the safety of international shipping lanes.",
            "Contextual_Annotation": "This incident in Yemen raised concerns about maritime security and potentially destabilized the region.",
            "Article_id": [
                "st_1165295"
            ]
        },
        {
            "Date": "23 November 2023",
            "Event": "Israel responded to Hamas's attack with a full-scale assault on Gaza, leading to the deaths of 14,800 Palestinians, including 40% children under the age of 18.",
            "Contextual_Annotation": "Israel's counteroffensive resulted in a significant loss of life in Gaza, highlighting the human cost of the conflict.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "24 November 2023",
            "Event": "A truce agreement was reached between Hamas and Israel, providing Gaza with its first respite after seven weeks of Israeli bombardment. A four-day truce was initiated, aiming to provide a temporary ceasefire and allow for negotiations.",
            "Contextual_Annotation": "The truce brought a temporary end to the fighting, allowing for a period of negotiations and potential for a more permanent solution.",
            "Article_id": [
                "st_1165295",
                "st_1164990"
            ]
        },
        {
            "Date": "26 November 2023",
            "Event": "The first phase of hostage releases began, with 58 hostages being freed from Gaza.",
            "Contextual_Annotation": "The release of hostages marked a significant step towards de-escalation and potential peace talks.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "27 November 2023",
            "Event": "Negotiators from Egypt, Qatar, and the United States worked to persuade Hamas and Israel to extend the truce, as Gazans appealed for its continuation. The release of additional hostages was planned for the same day.",
            "Contextual_Annotation": "International actors engaged in efforts to extend the truce and facilitate a lasting peace agreement.",
            "Article_id": [
                "st_1164990"
            ]
        },
        {
            "Date": "28 November 2023",
            "Event": "G-7 foreign ministers issued a joint statement supporting the extension of the pause in fighting in Gaza, urging the release of all hostages and emphasizing Israel's right to defend itself while stressing the importance of protecting civilians. Hamas released 12 more hostages, bringing the total number released since the truce began to 81. Israel released 30 Palestinian prisoners in return.",
            "Contextual_Annotation": "The G7 expressed its support for the truce and urged a peaceful resolution, emphasizing the importance of civilian protection. The exchange of hostages continued as part of the ongoing negotiations.",
            "Article_id": [
                "st_1165295"
            ]
        }
    ]
    return timeline