## Data Loader

In [8]:
import json
import logging
import pandas as pd
from typing import Optional
from sklearn.preprocessing import StandardScaler


def load_single_json(file_path: str) -> Optional[dict]:
    """
    Load JSON data from a file.
    """
    try:
        with open(file_path, 'r', encoding='utf-8') as fin:
            data = json.load(fin)
            print("Files Loaded")
        logging.info(f"JSON file '{file_path}' loaded successfully.")
        return data
    except FileNotFoundError:
        logging.error(f"File '{file_path}' not found. Please check the file path.")
        return None
    except json.JSONDecodeError:
        logging.error(f"Error decoding JSON from file '{file_path}'. Please check the file content.")
        return None

def combine_4_json(files):
    combined_data = []
    for file in files:
        with open(file, 'r') as f:
            # Load data from the file and append it to the combined list
            data = json.load(f)
            combined_data.extend(data)
    print("Input files combined\n")
    return combined_data

def read_load_json_to_df(json_data):
    for item in json_data:
        #Convert the embeddings to json string as CSVs dont accept list as a data type
        item['tags_embeddings'] = json.dumps(item['tags_embeddings'])
        item['Title_embeddings'] = json.dumps(item['Title_embeddings'])
    df = pd.DataFrame(json_data)
    print("Input data converted and read in\n")
    return df

def load_df(files):
    db = combine_4_json(files)
    df = read_load_json_to_df(db)
    #Drop nan rows 
    final_df = df.drop(df[df.isnull().any(axis=1)].index)
    return final_df


## Data Cleaner

In [9]:
import json
import re
from json.decoder import JSONDecodeError

def clean_llm_score(output):
    text = output.parts[0].text.replace("```", '').replace('json','')
    result = json.loads(text)
    return result

def clean_output(output):
    try:
        updated_timeline = json.loads(output)
        return updated_timeline
    except JSONDecodeError:
        #try 1: Ensuring that the string ends with just the open and close lists brackets
        try:
            new_output = re.search(r'\[[^\]]*\]', output).group(0)
        except AttributeError:
            new_output = re.search(r'\{.*?\}', output, re.DOTALL).group(0)  
        updated_timeline = json.loads(new_output)
        return updated_timeline

def clean_sort_timeline(timelines, df_retrieve):  
    generated_timeline = []
    for idx, line in timelines.items():
        indiv_timeline = clean_output(line)
        if type(indiv_timeline) == list:
            for el in indiv_timeline:
                generated_timeline.append(el)
        else:
            generated_timeline.append(indiv_timeline)
    unsorted_timeline = []

    for event in generated_timeline:
        article_index = event["Article"] - 1
        event["Article_id"] = df_retrieve.iloc[article_index].id
    for event in generated_timeline:
        del event["Article"]
        unsorted_timeline.append(event)  
        
    timeline = sorted(unsorted_timeline, key=lambda x:x['Date'])
    timeline = [event for event in timeline if event['Date'].lower()!= 'nan']
    for event in timeline:
        date = event['Date']
        if date.endswith('-XX-XX'):
            event['Date'] = date[:4]
        elif date.endswith('-XX'):
            event['Date'] = date[:7]
    return timeline


## Heirarchical Clustering

In [10]:
import ast
import numpy as np
from sklearn.metrics import silhouette_score
from sklearn.decomposition import PCA
from scipy.cluster.hierarchy import linkage, fcluster
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from scipy.cluster.hierarchy import linkage, fcluster


def split_df(df):
    df_train, df_test = train_test_split(df, test_size=1)
    return df_train, df_test.reset_index(drop=True)

def scale_df_embeddings(df_train, df_test):
    # Deserializing the embeddings
    body_embeddings_train = np.array(df_train['embeddings'].apply(ast.literal_eval).tolist())
    title_embeddings_train = np.array(df_train['Title_embeddings'].apply(ast.literal_eval).tolist())
    tags_embeddings_train = np.array(df_train['tags_embeddings'].apply(ast.literal_eval).tolist())

    body_embeddings_test = np.array(df_test['embeddings'].apply(ast.literal_eval).tolist())
    title_embeddings_test = np.array(df_test['Title_embeddings'].apply(ast.literal_eval).tolist())
    tags_embeddings_test = np.array(df_test['tags_embeddings'].apply(ast.literal_eval).tolist())

    # Combine embeddings
    all_embeddings_train = np.concatenate((body_embeddings_train, title_embeddings_train, tags_embeddings_train), axis=1)
    all_embeddings_test = np.concatenate((body_embeddings_test, title_embeddings_test, tags_embeddings_test), axis=1)

    # Standardize embeddings
    scaler = StandardScaler()
    train_embeddings = scaler.fit_transform(all_embeddings_train)
    test_embeddings = scaler.transform(all_embeddings_test)
    return train_embeddings,  test_embeddings

def get_variance_performance(train_embeddings):
# Experiment for this variance range of 94% to 97%
    print("Finding best Model parameters...\n")
    variance_range = list(np.arange(0.92, 0.95, 0.01))
    variance_dic = {}

    for variance in variance_range:
        pca = PCA(n_components=variance)
        train_pca_embeddings = pca.fit_transform(train_embeddings)
        
        # Range of max_d values to try, for this dataset we use 65
        max_d_values = np.arange(45, 65)
        
        # List to store silhouette scores
        silhouette_scores_train = []

        # Perform hierarchical clustering
        Z = linkage(train_pca_embeddings, method='ward')

        for max_d in max_d_values:
            clusters_train = fcluster(Z, max_d, criterion='distance')
            
            # Calculate silhouette score only if there are at least 2 unique clusters and fewer than the number of samples
            if 1 < len(set(clusters_train)) < len(train_pca_embeddings):
                score_train = silhouette_score(train_pca_embeddings, clusters_train)
            else:
                score_train = -1  # Assign a score of -1 if less than 2 unique clusters or too many clusters
            
            silhouette_scores_train.append(score_train)

        # Determine the best max_d
        best_max_d_train = max_d_values[np.argmax(silhouette_scores_train)]
        variance_dic[variance] = {
            'max_d_train': best_max_d_train,
            'best_train_silhouette': max(silhouette_scores_train)
        }
    return variance_dic

def get_best_variance(perf_results):
    highest_train_sil = 0
    best_variance_s = []
    for variance, scores in perf_results.items():
        if scores['best_train_silhouette'] > highest_train_sil:
            highest_train_sil = scores['best_train_silhouette']
            best_variance_s = [variance]  
        elif scores['best_train_silhouette'] == highest_train_sil:
            best_variance_s.append(variance)  
    
    final_best_max_d = perf_results[best_variance_s[0]]['max_d_train']
    print(f"Best variance for this dataset is {round(best_variance_s[0], 2)} and the best maximum distance is {final_best_max_d}\n")
    return round(best_variance_s[0], 2), final_best_max_d

def predict_cluster(test_embedding, train_embeddings, clusters):
        distances = np.linalg.norm(train_embeddings - test_embedding, axis=1)
        return clusters[np.argmin(distances)]

def get_cluster_labels(best_variance, best_max_d, train_embeddings, test_embeddings, df_train, df_test):
    # Perform PCA
    print(f"Training new Hierarchical Clustering model with best variance: {best_variance} and max_d: {best_max_d}\n")
    pca = PCA(n_components=best_variance)
    pca_train_embeddings = pca.fit_transform(train_embeddings)
    pca_test_embeddings = pca.transform(test_embeddings)

    Z = linkage(pca_train_embeddings, method='ward', metric='euclidean')
    clusters_train = fcluster(Z, best_max_d, criterion='distance')
    # Predict clusters for test data using the nearest cluster center

    test_clusters = [predict_cluster(te, pca_train_embeddings, clusters_train) for te in pca_test_embeddings]

    df_train['Cluster_labels'] = clusters_train
    df_test['Cluster_labels'] = test_clusters
    df_test.reset_index(drop=True, inplace=True)
    
    # Create a dictionary to store the results
    cluster_dict = {}

    # Populate the dictionary with cluster contents for each test point
    for i, (test_point, test_cluster) in enumerate(zip(df_test.itertuples(), test_clusters)):
        cluster_contents = []
        
        cluster_indices = np.where(clusters_train == test_cluster)[0]
        cluster_df = df_train.iloc[cluster_indices]
        
        cluster_dict = {
            "Test point": {'id': test_point.id,
                        "Title": test_point.Title, 
                        "Tags": test_point.tags},
            "Cluster": test_cluster,
            "Cluster contents": cluster_contents
        }
        
        for _, row in cluster_df.iterrows():
            cluster_contents.append({"id": row['id'], 
                                    "Title": row['Title'],
                                    "Tags": row['tags'], 
                                    })

    print(f"Cluster Label {test_cluster} is chosen\n")
    input_list = ""
    input_list += f"Test Artice Chosen: (Title: {cluster_dict['Test point']['Title']}\nTags: {cluster_dict['Test point']['Tags']}):\n"
    for _, row in cluster_df.iterrows():
        input_list += f"Article id: {row['id']}, Title: {row['Title']}, Tags: {row['tags']}]\n"
    return input_list, df_train, df_test


In [11]:
import os
import sys
import re
import json
import pandas as pd

# Import libraries for working with language models and Google Gemini
from langchain_core.prompts import PromptTemplate
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
from concurrent.futures import ThreadPoolExecutor, as_completed


# Load environment variables
from dotenv import load_dotenv
load_dotenv()
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)


def to_generate_timeline(test_data):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest')
    class Event(BaseModel):
        score: int = Field(description="The need for this article to have a timeline")
        Reason: str = Field(description = "The main reason for your choice why a timeline is needed or why it is not needed")
            

    output_parser = JsonOutputParser(pydantic_object=Event)

        # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    # Define the template
    template = '''
    You are a highly intelligent AI tasked with analyzing articles to determine whether generating a timeline of events leading up to the key event in the article would be beneficial. 
    Consider the following factors to make your decision:
    1. **Significance of the Event**:
       - Does the event have a significant impact on a large number of people, industries, or countries?
       - Are the potential long-term consequences of the event important?

    2. **Controversy or Debate**:
       - Is the event highly controversial or has it sparked significant debate?
       - Has the event garnered significant media attention and public interest?

    3. **Complexity**:
       - Does the event involve multiple factors, stakeholders, or causes that make it complex?
       - Does the event have deep historical roots or is it the culmination of long-term developments?

    4. **Personal Relevance**:
       - Does the event directly affect the reader or their community?
       - Is the event of particular interest to the reader due to economic implications, political affiliations, or social issues?

    5. Educational Purposes:
       - Would a timeline provide valuable learning or research information?

    Here is the information for the article:
    Title:{title}
    Text: {text}
    

    Based on the factors above, decide whether generating a timeline of events leading up to the key event in this article would be beneficial. 
    Your answer will include the need for this article to have a timeline with a score 1 - 5, 1 means unnecessary, 5 means necessary. It will also include the main reason for your choice.
    {format_instructions}    
    ANSWER:
    '''

    # Create the prompt template
    prompt = PromptTemplate(
        input_variables=["text", "title"],
        partial_variables={"format_instructions": format_instructions},
        template=template,
    )

        # Define the headline
    headline = test_data.Title[0]
    body = test_data.Text[0]

        # Format the prompt
    final_prompt = prompt.format(title=headline, text=body)

        # Generate content using the generative model
    response = llm.generate_content(
            final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
    final_response = clean_llm_score(response)
    # If LLM approves
    if final_response['score'] >=3:
        print("Timeline is necessary for this chosen article.\n")
        return True
    else:
        print("A timeline for this article is not required. \n")
        for part in final_response['Reason'].replace(". ", ".").split(". "):
            print(f"{part}\n")
        print("Hence I gave this a required timeline score of " + str(final_response['score']))
        return False

def get_article_dict(input_list, df_train, df_test):
    llm = genai.GenerativeModel("gemini-1.5-flash-latest")

    # Initialize the generative model
    class Event(BaseModel):
        Article_id: list = Field(description="Article ids that are most relevant for the generation of the timeline")
            

    output_parser = JsonOutputParser(pydantic_object=Event)

    # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    template = '''
    Task Description: Given the following test article, and the relevant tags of that article, and the contents of articles similar to it.
    You will only select the articles that are closest in similarity to the test article, \
    for which i will be able to leverage on to build a timeline upon. 
    Return the article ids for the chosen articles. 
    Ensure that the chosen articles are relevant in terms of geographical location, main topic and whether or not they are talking about the same event or topic.
    {text}

    {format_instructions}
    Check and ensure again that the output follows the format instructions above very strictly. 
    '''

    # Create the prompt template
    prompt = PromptTemplate(
        input_variables=["text"],
        partial_variables={"format_instructions": format_instructions},
        template=template,
    )

    final_prompt = prompt.format(text=input_list)
    response = llm.generate_content(
            final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
    new_output = re.search(r'\[[^\]]*\]', response.parts[0].text).group(0)
    article_keys =  json.loads(new_output)
    if not article_keys:
        print("No useful similar articles found in database for timeline generation.\n")
        sys.exit()
    
    similar_articles_dict = {}
    
    # Iterate over each test article in the filtered df_test
    for index, test_row in df_test.iterrows():
        test_cluster_label = test_row['Cluster_labels']
        
        # Filter df_train for the same cluster label
        df_train_cluster = df_train[df_train['Cluster_labels'] == test_cluster_label]
        
        # Find similar articles in df_train
        similar_indexes = []
        for train_index, train_row in df_train_cluster.iterrows():
            if train_row['id'] in article_keys:
                similar_indexes.append(train_index)
        
        # Store the result in the dictionary if there are at least 2 supporting articles
        if len(similar_indexes) >= 2:
            similar_articles_dict = {
                'Title': test_row['Title'],
                'indexes': similar_indexes,
                'Text': test_row['Text']
            }
    print(similar_articles_dict)
    if not similar_articles_dict:
        print("Inadequate articles found to construct a timeline... Exiting execution now\n")
        sys.exit()
    else:
        # Print results 
        print("-"*80 + "\n")
        print(f"Test Article Title: << {similar_articles_dict['Title']}>>\n")
        print("Supporting Article Titles:")
        for idx in similar_articles_dict['indexes']:
            print(f" - {df_train.loc[idx, 'Title']}")
        print("\n" + "-"*80)
        return similar_articles_dict
        
def generate_and_sort_timeline(similar_articles_dict, df_train, df_test):
    llm = genai.GenerativeModel('gemini-1.5-flash-latest' )
    
    class Event(BaseModel):
        Date: str = Field(description="The date of the event in YYYY-MM-DD format")
        Event: str = Field(description="A detailed description of the important event")
        Article: int = Field(description="The article number from which the event was extracted")

    output_parser = JsonOutputParser(pydantic_object=Event)

    # See the prompt template you created for formatting
    format_instructions = output_parser.get_format_instructions()

    template = '''
    Given an article, containing a publication date, title, and content, your task is to construct a detailed timeline of events leading up to the main event described in the article.
    Begin by thoroughly analyzing the title, content, and publication date of the article to understand the main event in the article. 
    the dates are represented in YYYY-MM-DD format. Identify events, context, and any time references such as "last week," "last month," or specific dates. 
    The article could contain more or one key events. 
    If the article does not provide a publication date or any events leading up to the main event, return NAN in the Date field, and 0 i the Article Field

    Construct the Timeline:
    Chronological Order: Organize the events chronologically, using the publication dates and time references within the articles.
    Detailed Descriptions: Provide detailed descriptions of each event, explaining how it relates to the main event of the first article.
    Contextual Links: Use information from the articles to link events together logically and coherently.
    Handle Ambiguities: If an article uses ambiguous time references, infer the date based on the publication date of the article and provide a clear rationale for your inference.

    Contextual Links:
    External Influences: Mention any external influences (e.g., global conflicts, economic trends, scientific discoveries) that might have indirectly affected the events.
    Internal Issues: Highlight any internal issues or developments (e.g., political changes, organizational restructuring, societal movements) within the entities involved that might have impacted the events.
    Efforts for Improvement: Note any indications of efforts to improve the situation (e.g., policy changes, strategic initiatives, collaborative projects) despite existing challenges.

    Be as thorough and precise as possible, ensuring the timeline accurately reflects the sequence and context of events leading to the main event.

    Article:
    {text}

    {format_instructions}
    Check and ensure again that the output follows the format instructions above very strictly. 
    '''

    prompt = PromptTemplate(
        input_variables=["text"],
        partial_variables={"format_instructions": format_instructions},
        template=template
    )
    
    def generate_individual_timeline(date_text_triples):
        s =  f'Article {date_text_triples[0]}: Publication date: {date_text_triples[1]} Article Text: {date_text_triples[2]}'
        final_prompt = prompt.format(text=s)
        response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
        # Check if Model returns correct format 
        if '[' in response.parts[0].text or '{' in response.parts[0].text:
            result = response.parts[0].text
        else:
            retry_response = llm.generate_content(final_prompt,
                                        safety_settings={
                                            HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                                            HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                                            HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
                                            })
            try:
                result = retry_response.parts[0].text
            except ValueError:
                print("ERROR: There were issues with the generation of the timeline. The timeline could not be generated")
                return
        return result
    
    def process_articles(df_train):
        df_retrieve = df_train.loc[similar_articles_dict['indexes']]
        df_retrieve = pd.concat([df_retrieve, df_test], axis=0).iloc[::-1].reset_index(drop=True)

        # Prepare texts and publication dates
        indiv_numbers = list(range(1,len(df_retrieve)+1))
        indiv_text = df_retrieve['combined'].tolist()
        indiv_dates = df_retrieve['Publication_date'].tolist()
        date_text_triples = list(zip(indiv_numbers, indiv_text, indiv_dates))

        dict_of_timelines = {}
        
        with ThreadPoolExecutor(max_workers=len(date_text_triples)) as executor:
            futures = {executor.submit(generate_individual_timeline, date_text_triple): date_text_triple for date_text_triple in date_text_triples}
            i = 0
            for future in as_completed(futures):
                dict_of_timelines[i] = future.result()
                i += 1
        return dict_of_timelines, df_retrieve
    
    timeline_dic, df_retrieve = process_articles(df_train)
    
    print("The first timeline has been generated\n")
    generated_timeline = []
    for idx, line in timeline_dic.items():
        indiv_timeline = clean_output(line)
        if type(indiv_timeline) == list:
            for el in indiv_timeline:
                generated_timeline.append(el)
        else:
            generated_timeline.append(indiv_timeline)
    
    unsorted_timeline = []
    for event in generated_timeline:
        article_index = event["Article"] - 1
        event["Article_id"] = df_retrieve.iloc[article_index].id
    for event in generated_timeline:
        del event["Article"]
        unsorted_timeline.append(event)  
        
    timeline = sorted(unsorted_timeline, key=lambda x:x['Date'])
    finished_timeline = [event for event in timeline if event['Date'].lower()!= 'nan']
    for i in range(len(generated_timeline)):
        date = generated_timeline[i]['Date']
        if date.endswith('-XX-XX') or date.endswith('00-00'):
            generated_timeline[i]['Date'] = date[:4]
        elif date.endswith('-XX') or date.endswith('00'):
            generated_timeline[i]['Date'] = date[:7]
    return finished_timeline, df_retrieve


## Timeline enhancer

In [12]:
from pydantic import BaseModel, Field
import os
from dotenv import load_dotenv
from concurrent.futures import ThreadPoolExecutor, as_completed

# Import libraries for working with language models and Google Gemini
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import JsonOutputParser
from langchain_core.pydantic_v1 import BaseModel, Field
import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

# Load environment variables
load_dotenv()
GEMINI_KEY = os.environ.get('GEMINI_KEY')
genai.configure(api_key=GEMINI_KEY)

def extract_json_from_string(string):
    # Use a regular expression to find the content within the first and last square brackets
    match = re.search(r'\[.*\]', string, re.DOTALL)
    
    if match:
        json_content = match.group(0)
        try:
            # Load the extracted content into a JSON object
            json_data = json.loads(json_content)
            return json_data
        except json.JSONDecodeError as e:
            print("Failed to decode JSON:", e)
            return None
    else:
        print("No valid JSON content found.")
        return None

def split_batches(timeline, max_batch_size=30):
    n = len(timeline)
    if n <= max_batch_size:
        return [timeline]
    
    num_batches = n // max_batch_size
    remainder = n % max_batch_size
    
    if remainder > 0 and remainder < max_batch_size // 2:
        num_batches -= 1
        remainder += max_batch_size

    batches = []
    start = 0
    for i in range(num_batches):
        end = start + max_batch_size
        batches.append(timeline[start:end])
        start = end
    
    if remainder > 0:
        batches.append(timeline[start:start + remainder])
    return batches

def enhance_timeline(timeline):
    print("\nProceeding to enhance the timeline...\n")
    llm = genai.GenerativeModel(model_name='gemini-1.5-flash-latest')

    class Event(BaseModel):
            Date: str = Field(description="The date of the event in YYYY-MM-DD format")
            Event: str = Field(description="A detailed description of the event")
            Contextual_Annotation: str = Field(description="Contextual anecdotes of the event.")
            Article_id: list = Field(description="The article id(s) from which the event was extracted")

    parser = JsonOutputParser(pydantic_object=Event)

    template = '''
        You are given a timeline of events, your task is to enhance this timeline by improving its clarity and contextual information.
        IF the same event occurs on the exact same date, merge these events to avoid redundancy, and add the article ids to a list. 
        Add contextual annotations by providing brief annotations for major events to give additional context and improve understanding.
        Only retain important information that would be value-add when the general public reads the information.

        Initial Timeline:
        {text}

        {format_instructions}
        Ensure that the format follows the example output format strictly before returning the output.
        '''
    prompt = PromptTemplate(
            input_variables=["text"],
            template=template
        )
            
    def generate_enhanced(batch):
        batch_timeline_text = json.dumps(batch)
        final_prompt = prompt.format(text=batch_timeline_text, format_instructions=parser.get_format_instructions())
        response = llm.generate_content(final_prompt,
            safety_settings={
                HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE,
                HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, 
                HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE
            }
        )
        data = extract_json_from_string(response.parts[0].text)
        return data

    def process_articles(timeline):
        results = []
        batches = split_batches(timeline, max_batch_size=30)
        num_batches = len(batches)

        with ThreadPoolExecutor(max_workers=num_batches) as executor:
            print("Processing batches simultaneously now...\n")
            futures = {executor.submit(generate_enhanced, batch): batch for batch in batches}
            for future in as_completed(futures):
                indiv_batch = future.result()
                for event in indiv_batch:
                    results.append(event)
        return results

    full_enhanced = process_articles(timeline)
    sorted_timeline = sorted(full_enhanced, key=lambda x:x['Date'])
    print("Finished enhancing the timeline\n")
    return sorted_timeline

def save_enhanced_timeline(enhanced_timeline, output_path: str):
    """
    Save the enhanced timeline to a JSON file.

    Parameters:
    enhanced_timeline (list): The enhanced timeline data.
    output_path (str): The file path where the JSON will be saved.
    """
    sorted_events = sorted(enhanced_timeline, key=lambda x: x['Date'])
    json_data = json.dumps(sorted_events, indent=4, ensure_ascii=False)

    # Write the JSON string to a file
    with open(output_path, 'w', encoding='utf-8') as fin:
        fin.write(json_data)
    print(f"Enhanced timeline saved to '{output_path}'")

In [13]:
def main():
    files = ['../data_upload/final_db1.json', '../data_upload/final_db2.json', '../data_upload/final_db3.json', '../data_upload/final_db4.json']
    df = load_df(files)
    df_train, df_test = split_df(df)

        #check if the test point is worth generating a timeline from first. 
    if to_generate_timeline(df_test):   
            #Assuming df is already defined and contains the necessary columns
            train_embeddings,  test_embeddings = scale_df_embeddings(df_train, df_test)
            variance_perf = get_variance_performance(train_embeddings)
            best_variance, best_max_d = get_best_variance(variance_perf)
            input_list, df_train, df_test = get_cluster_labels(best_variance, best_max_d, train_embeddings, test_embeddings, df_train, df_test)

            #Generating the timeline
            similar_articles_dict = get_article_dict(input_list, df_train, df_test)
            generated_timeline, retrieval = generate_and_sort_timeline(similar_articles_dict, df_train, df_test)

            # Enhancing the timeline
            final_timeline = enhance_timeline(generated_timeline)
            output_path = '../data_upload/Enhanced_timeline.json'
            save_enhanced_timeline(final_timeline, output_path)
            return final_timeline, retrieval
    else:
        return None, None

In [19]:
files = ['../data_upload/final_db1.json', '../data_upload/final_db2.json', '../data_upload/final_db3.json', '../data_upload/final_db4.json']
df = load_df(files)
df_train, df_test = split_df(df)

        #check if the test point is worth generating a timeline from first. 
if to_generate_timeline(df_test):   
            #Assuming df is already defined and contains the necessary columns
            train_embeddings,  test_embeddings = scale_df_embeddings(df_train, df_test)
            variance_perf = get_variance_performance(train_embeddings)
            best_variance, best_max_d = get_best_variance(variance_perf)
            input_list, df_train, df_test = get_cluster_labels(best_variance, best_max_d, train_embeddings, test_embeddings, df_train, df_test)

            #Generating the timeline
            similar_articles_dict = get_article_dict(input_list, df_train, df_test)
            generated_timeline, retrieval = generate_and_sort_timeline(similar_articles_dict, df_train, df_test)
            final_timeline = enhance_timeline(generated_timeline)
            # Enhancing the timeline


Input files combined

Input data converted and read in

Timeline is necessary for this chosen article.

Finding best Model parameters...

Best variance for this dataset is 0.92 and the best maximum distance is 55

Training new Hierarchical Clustering model with best variance: 0.92 and max_d: 55

Cluster Label 518 is chosen

{'Title': 'Ukraine suspects Russia poisoned spy chief’s wife', 'indexes': [770, 1370], 'Text': 'KYIV - Ukraine said on Nov 28 it believed Russia had poisoned the wife of its military intelligence chief, in an apparent assassination attempt targeting the heart of Kyiv’s leadership.Ms Marianna Budanova, who is an adviser to Kyiv Mayor Vitali Klitschko, was hospitalised after a prolonged deterioration in her health, the Babel news outlet reported earlier.Ukrainian investigators’ “main hypothesis” is that Russia was involved in the poisoning attempt, Ukrainian military intelligence spokesman Andriy Yusov later told AFP.  “The target was the wife,” he added, because “it 

In [20]:
final_timeline

[{'Date': '2018',
  'Event': 'A former Russian military intelligence officer, convicted of high treason by Moscow, was poisoned along with his daughter in Britain. Paul Whelan, a U.S. Marine Corps veteran, was arrested in Russia.',
  'Contextual_Annotation': "This poisoning event in Britain highlighted past accusations of poisoning attacks against Russian dissidents and served as a precedent for the current suspicions regarding Marianna Budanova's poisoning.",
  'Article_id': ['h4g9g9zxoy5klp34', 'c3ydcl28nk5o24to']},
 {'Date': '2020',
  'Event': 'Paul Whelan was convicted of spying charges in Russia and sentenced to 16 years in a penal colony in Mordovia.',
  'Contextual_Annotation': "This event marked a significant escalation in the strained relations between the United States and Russia, with Whelan's conviction seen as politically motivated.",
  'Article_id': ['c3ydcl28nk5o24to']},
 {'Date': '2022-02',
  'Event': 'Russia launched its full-scale assault on Ukraine.',
  'Contextual_A

In [22]:
json_data = json.dumps(final_timeline, indent=4, ensure_ascii=False)
with open("../public/data_upload/Timeline.json", "w", encoding='utf-8') as fout:
    fout.write(json_data)

In [None]:
generated_timeline = [{'Date': '2013-01-01',
  'Event': 'During the Obama administration, the US provided a substantial amount of aid to the Palestinian territories, disbursing US$1 billion in the 2013 fiscal year.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2020-01-01',
  'Event': 'The Trump administration significantly reduced US aid to the Palestinian territories, lowering it to US$8 million for the year.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2023-01-20',
  'Event': 'President Biden assumed office and reinstated aid to the Palestinian territories, bringing the total yearly US assistance to approximately US$150 million.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2023-10-07',
  'Event': 'Israel launched a military operation against the Gaza Strip in response to a Hamas attack that killed 1,400 people. This attack marked the beginning of the current conflict in Gaza, leading to increased humanitarian needs and a breakdown of civil order within the Palestinian enclave.',
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-07',
  'Event': 'Hamas launched a military campaign against Israel, resulting in the deaths of 1,200 people and the taking of 240 hostages.',
  'Article_id': 'et24x553bmr7np38'},
 {'Date': '2023-10-07',
  'Event': "Israel imposed a strict blockade on all goods entering Hamas-controlled Gaza in response to Hamas's military campaign.",
  'Article_id': 'et24x553bmr7np38'},
 {'Date': '2023-10-07',
  'Event': 'Israel imposed a siege on the Gaza Strip after Hamas gunmen attacked Israeli towns and villages in the south, including the stoppage of water supplies to the Gazan population.',
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-07',
  'Event': "Palestinian militant group Hamas launched a devastating assault on Israel, killing over 1,300 Israelis and taking scores of hostages, marking the worst breach of Israel's defenses since its creation in 1948.",
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-07',
  'Event': "Israel responded to the Hamas assault by imposing a 'total blockade' on Gaza, halting food supplies and cutting electricity.",
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-07',
  'Event': "Hamas militants launch a cross-border raid into Israel, killing approximately 1,200 Israelis, mostly civilians, in the deadliest day in Israel's history.",
  'Article_id': 'ivsjqtgvfm48daf7'},
 {'Date': '2023-10-07',
  'Event': "Hamas militants launched an attack on Israel, triggering Israel's retaliatory bombardment and 'total siege' of the Gaza Strip.",
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-07',
  'Event': 'Hamas militants launched an attack on southern Israel, resulting in the deaths of 1,400 people and the capture of hundreds as hostages. This attack triggered a retaliation by Israel, leading to the imposition of a siege on Gaza.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-07',
  'Event': 'Israel imposed a siege on Gaza in retaliation for the Hamas attack.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-07',
  'Event': 'Hamas militants launched a surprise attack on Israel, resulting in the deadliest attack on Jewish civilians since the Holocaust.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-07',
  'Event': 'Hamas militants took hostages during their surprise attack on Israel.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'Israel responded to the Hamas attack by imposing a total siege on Gaza, home to 2.3 million people, and launching a massive bombing campaign.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'The ICRC reported that Gaza still has fuel for generators, including those in hospitals, but it could run out in a few hours due to the siege.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'The ICRC stated that its aid and medical supplies within Gaza are stranded due to the lack of safe passage.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': "The ICRC is in talks with all parties, including Egypt, to open a humanitarian corridor, with the Rafah crossing between Gaza and Egypt's Sinai peninsula being the main option.",
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': "Most of Gaza's population has no power and water due to the Israeli strikes.",
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'The ICRC is preparing for a possible ground invasion of Gaza by pre-positioning staff and stocks in strategic hubs close to the enclave.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'Israel declared that there will be no humanitarian exceptions to its siege until all hostages seized by Hamas militants are freed.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'The death toll from the Hamas attack on Israel had risen to over 1,300, according to public broadcaster Kan.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-12',
  'Event': 'The ICRC confirmed that it is in direct contact with both Hamas and Israeli officials regarding the hostages.',
  'Article_id': 'a52mvx23ow3p3r1b'},
 {'Date': '2023-10-13',
  'Event': 'Israel ordered residents of northern Gaza to evacuate, citing safety concerns and to avoid being caught in the conflict.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-13',
  'Event': 'Israel guaranteed the safety of Palestinians fleeing the north on two main roads until 4 p.m. (1300 GMT).',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-13',
  'Event': 'Gaza authorities reported that 70 people were killed and 200 were wounded when Israeli strikes targeted cars and trucks carrying people fleeing the north.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'As Israeli bombardment intensified, bakeries in Gaza began running out of bread and drinking water became scarce due to power outages.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'The influx of people fleeing the north to the south of Gaza overwhelmed already strained resources.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'The United Nations urged Israel to prevent a humanitarian catastrophe in Gaza, highlighting the dire situation of 2.3 million people trapped in the region.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'Shops in Gaza started running out of basic necessities like eggs, rice, canned food, and milk due to the ongoing blockade.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'Israel intensified its military operation by launching raids and building up troops and tanks on the border with Gaza.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'The United Nations Commissioner-General for UNRWA, Philippe Lazzarini, appealed for the lifting of the siege on humanitarian assistance, stressing the urgent need for fuel to provide safe drinking water and prevent dehydration, particularly among children, the elderly, and women.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'U.S. President Joe Biden stated his commitment to addressing the humanitarian crisis in Gaza, working with Israel, Egypt, Jordan, other Arab states, and the UN. He also pledged to ensure Israel had the necessary resources to respond to the Hamas attack and to bring any U.S. hostages home.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'Gaza authorities reported 10,000 people injured in the bombardment, with hospitals struggling to cope due to shortages of medical supplies and fuel.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'The Hamas government media office reported that over 1,695 buildings and high-rise towers were destroyed in Israeli airstrikes, along with 7,000 housing units.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-14',
  'Event': 'Power cuts in Gaza forced residents to rely on solar panels for charging their phones, as they remained cut off from the outside world.',
  'Article_id': '809rlzgnb15kdkuo'},
 {'Date': '2023-10-15',
  'Event': 'Israel announced that it would resume some water supplies to the Gaza Strip as part of an agreement with the United States.',
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-16',
  'Event': "Hamas reported that there had been no resumption of water supplies to the Gaza Strip despite Israel's previous pledge.",
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-16',
  'Event': 'An Israeli official confirmed that some water was being provided to the southern community of Bnei Sahila, near Khan Younis, but declined to specify the amount.',
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-16',
  'Event': 'Israeli Energy and Infrastructure Minister Israel Katz stated that resuming water supplies in southern Gaza was intended to encourage Palestinian civilians to congregate in that area, while Israel targeted Hamas in Gaza City to the north.',
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-16',
  'Event': 'Israel ordered residents of northern Gaza, including Gaza City, to evacuate to the south.',
  'Article_id': 'r116ovpazmfs1xn5'},
 {'Date': '2023-10-17',
  'Event': 'The World Health Organization (WHO) expressed its urgent need for access to Gaza to deliver aid and medical supplies, highlighting a looming humanitarian crisis in the Israeli-occupied Palestinian enclave.',
  'Article_id': 'sbaewi0thfkcqyan'},
 {'Date': '2023-10-17',
  'Event': "Dr. Richard Brennan, WHO's regional emergency director for the Eastern Mediterranean, engaged in discussions with decision-makers to expedite access to Gaza.",
  'Article_id': 'sbaewi0thfkcqyan'},
 {'Date': '2023-10-17',
  'Event': 'Dr. Richard Peeperkorn, WHO Representative in the occupied Palestinian territories, reported a tragic toll of 2,800 deaths and 11,000 injuries in Gaza, resulting from Israeli air strikes that commenced earlier.',
  'Article_id': 'sbaewi0thfkcqyan'},
 {'Date': '2023-10-17',
  'Event': 'The WHO statistics indicated that approximately half of the casualties were women and children.',
  'Article_id': 'sbaewi0thfkcqyan'},
 {'Date': '2023-10-21',
  'Event': 'Aid began flowing into Gaza through the Rafah crossing point, with up to 20 trucks crossing daily.',
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-22',
  'Event': 'US Special Envoy David Satterfield confirmed that the provision of food, water, and medicine would remain the focus of the aid effort in the immediate future.',
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-22',
  'Event': 'Two hostages were released through the Rafah crossing, with Hamas crediting Qatar and Egypt for their mediation.',
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-23',
  'Event': 'The Singapore Red Cross (SRC) launched a public fundraising appeal for relief and recovery operations in Gaza. This appeal was scheduled to close on January 31, 2024.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-23',
  'Event': 'The SRC previously pledged US$150,000 to relief efforts in Gaza.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-23',
  'Event': 'The Singapore Government contributed $300,000 towards relief operations in Gaza through the SRC.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-23',
  'Event': 'Two more hostages were released.',
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-24',
  'Event': 'In response to the ongoing Israel-Hamas war and the humanitarian crisis in Gaza, President Biden pledged US$100 million in aid for the citizens of Gaza.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2023-10-24',
  'Event': 'The US, in conjunction with Israel and Egypt, agreed to a plan to deliver the humanitarian aid to Gaza via the Rafah crossing on the Egyptian border.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2023-10-24',
  'Event': 'The aid convoy, under the UN flag, will be inspected for weapons before crossing into Gaza. The convoy will be unarmed and will face potential risks of diversion or attack.',
  'Article_id': 'yssfrux1ldigt3pb'},
 {'Date': '2023-10-24',
  'Event': 'An Israeli tank shelled an Egyptian position near the Rafah crossing, injuring several Egyptian border guards. Israel claimed the incident was an accident.',
  'Article_id': 'c10ogpwyyopbp20g'},
 {'Date': '2023-10-27',
  'Event': 'Before the conflict, around 500 trucks per day were crossing into Gaza. However, in the days following the Hamas attack, only an average of 12 trucks per day were entering Gaza.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-28',
  'Event': 'United Nations Secretary-General Antonio Guterres expressed concern about the limited aid delivery to Gaza, highlighting the significant decrease in the number of trucks entering the enclave.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-28',
  'Event': 'Israel vowed to eliminate Hamas, the ruling group in Gaza, which they accused of orchestrating the October 7th attack on southern Israel.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-28',
  'Event': 'Israeli jets continued to drop bombs in Gaza, suggesting a possible ground offensive against Hamas.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-28',
  'Event': 'Palestinian authorities reported over 7,000 deaths in Gaza.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-29',
  'Event': 'Thousands of Gaza residents broke into U.N. warehouses to seize flour and other items, leading to the closure of four U.N. aid distribution centers and a storage facility in Gaza. This event signifies the desperate situation in Gaza as food and water shortages become increasingly severe due to the ongoing conflict.',
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-29',
  'Event': 'The Rafah border crossing, a vital logistics hub for aid distribution, became overwhelmed with 8,000 people seeking shelter, further complicating the delivery of humanitarian aid. This event highlights the escalating humanitarian crisis in Gaza as the conflict intensifies and more people seek safety and resources.',
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-29',
  'Event': "Despite international pressure, Israel refused to allow fuel, water supplies, and relief distribution in Gaza from its territory, citing concerns about aid falling into Hamas's hands. This event reflects Israel's ongoing efforts to limit Hamas's access to resources while attempting to contain the conflict's impact on Gaza's civilian population.",
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-29',
  'Event': 'The Palestinian Red Crescent reported that 140 trucks of aid had entered Gaza since Oct. 7, with the largest delivery of 33 trucks arriving on Sunday. This event demonstrates the efforts being made by international organizations to provide humanitarian aid to Gaza, despite the challenges posed by the conflict.',
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-29',
  'Event': "U.S. President Joe Biden and Egypt's Abdel Fattah al-Sisi committed to accelerating assistance to Gaza, leading to the dispatch of several dozen trucks from the Egyptian side of Rafah. This event indicates a growing commitment from international actors to provide support to the struggling people of Gaza, although challenges remain in effectively delivering aid.",
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-29',
  'Event': 'Egypt\'s Foreign Ministry criticized "Israeli obstacles," particularly truck inspection procedures, as impeding the efficient delivery of aid to Gaza through the Rafah crossing.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-29',
  'Event': 'Egypt highlighted the time-consuming process of inspecting trucks at the Israeli Nitzana crossing before they reach the Rafah crossing, causing significant delays in aid delivery.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-29',
  'Event': 'The Rafah crossing, under Egyptian control and not bordering Israel, has become the primary route for aid delivery to Gaza since the Israeli siege.',
  'Article_id': 'tubm0bbus5itkpgm'},
 {'Date': '2023-10-30',
  'Event': 'UNWRA officials expressed that the situation in Gaza had deteriorated to a point where increased aid alone could not solve the problems. A humanitarian ceasefire was deemed necessary to address the collapse of public services and the escalating humanitarian crisis. This event highlights the urgency of finding a resolution to the conflict to prevent a further deterioration of the situation in Gaza.',
  'Article_id': '3iwa09nlsu0xxnrk'},
 {'Date': '2023-10-30',
  'Event': 'The SRC announced its decision to send US$200,000 (S$273,000) worth of relief supplies to Gaza.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC deployed a staff member to Cairo, Egypt, to collaborate with the Egyptian Red Crescent (ERC) in assessing the immediate needs in Gaza and facilitating the delivery of relief supplies.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC and ERC are working together to coordinate humanitarian aid into Gaza through the Rafah border crossing.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'Reports indicate that food, water, and medical supplies have been entering Gaza through the Egyptian border, but the amount provided is insufficient to meet the substantial needs on the ground.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC statement mentions that affected communities in Gaza have received limited humanitarian aid and that the Palestine Red Crescent Society (PRCS) has been operating under challenging conditions to provide emergency assistance.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC highlights severe shortages of essential supplies in Gaza, including fuel, water, food, and medical supplies, which are rapidly depleting.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC expressed deep concern about the suffering of the people in Gaza, particularly the hardships faced by families, children, women, and the elderly.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC mentioned that many victims in Gaza have to travel long distances to reach safe areas and lack access to basic necessities like shelter and water.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC noted that damaged infrastructure and communication disruptions have exacerbated the crisis in Gaza, isolating victims from the outside world.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC urged all parties involved in the conflict to uphold their responsibilities under international humanitarian law and prioritize the protection of civilians, including hospitals, medical personnel, and humanitarian workers.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-10-30',
  'Event': 'The SRC stressed the importance of enabling unhindered access for critical humanitarian aid to reach all affected communities in Gaza, including displaced individuals.',
  'Article_id': 'yzggf4j7y9n14t4k'},
 {'Date': '2023-11-14',
  'Event': "Hamas releases a video showing an Israeli soldier, who was captured during the October 7th raid, alive. Later, Hamas releases images of what they claim to be the soldier's body after she was killed in an Israeli strike.",
  'Article_id': 'ivsjqtgvfm48daf7'},
 {'Date': '2023-11-15',
  'Event': 'Israel allowed a small amount of fuel into Gaza to keep UNRWA aid delivery trucks moving.',
  'Article_id': 'et24x553bmr7np38'},
 {'Date': '2023-11-16',
  'Event': "US Secretary of State Antony Blinken made calls to members of Israel's war Cabinet, warning of a humanitarian catastrophe in Gaza due to the fuel shortage.",
  'Article_id': 'et24x553bmr7np38'},
 {'Date': '2023-11-16',
  'Event': "No aid lorries arrive in Gaza for a second consecutive day due to fuel shortages. Israeli troops recover the body of another woman hostage, also in a building near Al-Shifa Hospital. Hamas claims that the U.S. allegations of their military use of Al-Shifa Hospital are false and that the Israeli military performance is weak. Hamas' Al-Quds Brigades engage Israeli forces in Jenin, unleashing a 'torrent of fire' and laying ambushes with explosives.",
  'Article_id': 'ivsjqtgvfm48daf7'},
 {'Date': '2023-11-17',
  'Event': "Israel's war Cabinet agreed to allow 140,000 liters of fuel into Gaza every two days, after a request from Washington.",
  'Article_id': 'et24x553bmr7np38'},
 {'Date': '2023-11-17',
  'Event': "United Nations aid deliveries to Gaza are suspended again due to fuel shortages and a communication shutdown, deepening the suffering of thousands of hungry and homeless Palestinians. The UN World Food Programme (WFP) warns of the 'immediate possibility of starvation' due to the lack of food supplies. Israeli troops find a tunnel shaft used by Hamas at Al-Shifa Hospital and a vehicle containing a large number of weapons. The Israeli military says soldiers retrieve the body of a female soldier who had been held captive near Al-Shifa Hospital. Israeli warplanes strike militants in Jenin who had opened fire on Israeli soldiers, killing at least five of them. The World Health Organisation expresses concern about the spread of disease in Gaza, citing a surge in respiratory infections and diarrhoea cases.",
  'Article_id': 'ivsjqtgvfm48daf7'}]