In [23]:
%load_ext autoreload
%autoreload 2

import pandas as pd

# Set display options to show more columns and wider output
pd.set_option('display.max_columns', None)  # Show all columns
pd.set_option('display.max_colwidth', None)  # Don't truncate column contents

from IPython.display import display

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


# Tasks
## Backlog
- Remove \n signs
- Fix cost calculation
## Done
- Pass for each event the last timestemp an update apeared
- Pass the OpenAI client json that is loaded with data the the promot will use
- Add total cost for OpenAI client
- Create organizations list with translation to he and en

# אפיון בסיסי
- להתריע על אירועים לפי גזרה מוגדרת שמעניינת אותנו
- לא כל אירוע שמתקיים בארץ ואפילו בצפון מעניין אותנו
- צריך להגדיר בצורה מדויקת מה מעניין ומה לא

# Read Messeges

In [8]:
import json
import pandas as pd

# Specify the file path
file_path = "telegram_messages.json"  # Replace with the actual file path if different

# Read the JSON file
with open(file_path, "r", encoding="utf-8") as f:
    data = json.load(f)

# Convert the JSON data to a Pandas DataFrame
df = pd.json_normalize(data)

# Display the DataFrame
df.head()

Unnamed: 0,channel,message_id,timestamp,message,media,metadata.sender_id,metadata.message_type
0,From_hebron,270263,2024-10-13 19:32:55,♦️إعلام العدو يتحدث عن حدث أمني صعب.,[],-1001478404564,Message
1,From_hebron,270262,2024-10-13 19:32:20,♦️ يسرائيل للو تسنزورا: إصابة قاعدة تدريب غولا...,[],-1001478404564,Message
2,From_hebron,270261,2024-10-13 19:31:11,محكمة الاحتلال تُمدد الحكم على الأسير سليم الر...,"[{'media_type': 'MessageMediaPhoto', 'media_id...",-1001478404564,Message
3,From_hebron,270260,2024-10-13 19:29:24,♦️المتحدثة باسم الشرطة الصهيونيه : المتظاهرون ...,"[{'media_type': 'MessageMediaDocument', 'media...",-1001478404564,Message
4,From_hebron,270259,2024-10-13 19:29:07,♦️ القسام يعلن تفجير عين نفق في قوة اسرائيلية ...,[],-1001478404564,Message


# Translate Messages

In [73]:
import pandas as pd
from typing import Optional, Dict, List
from pydantic import BaseModel
from openai_client import OpenAIClient
from tqdm import tqdm

class TranslationResponse(BaseModel):
    """Response model for translations."""
    hebrew: str
    english: str

class NERResponse(BaseModel):
    """Response model for Named Entity Recognition (NER)."""
    locations: List[str]
    people: List[str]
    organizations: List[str]

class MessageTranslator:
    """Processor class to handle Telegram message translation."""
    
    def __init__(self, client: OpenAIClient):
        self.client = client

    def translate_message(self, message: str) -> Optional[TranslationResponse]:
        """Translate a Telegram message into both Hebrew and English."""
        system_message = """
            Act as a highly accurate translator. Your task is to take an Arabic message from a Telegram group and translate it into both 
            Hebrew and English.

            Ensure that:
            
            - All emojis and decorative symbols (such as bullets, arrows, or icons) are removed from the original message before translation.
            - The translations are precise and maintain the original meaning, tone, and nuance, so the user fully understands both the context 
              and sentiment.
            - Military, organizational, or specific terminology (e.g., names of groups, military actions) should be translated consistently 
              without alteration or substitution.
            - Prioritize clarity and natural phrasing in both Hebrew and English, avoiding overly literal translations, while maintaining 
              fidelity to the original message.
            - Retain punctuation and formatting (such as parentheses) accurately to ensure readability in both languages.
            - Maintain urgency and emotional tone (e.g., in reports of military strikes, martyrdom) in a way that sounds natural and fluent.
        """
        
        response = self.client.chat(
            system_message=system_message,
            user_message=message,
            response_format=TranslationResponse
        )

        return response

class NERProcessor:
    """Processor class to handle Named Entity Recognition (NER) on Telegram messages."""
    
    def __init__(self, client: OpenAIClient):
        self.client = client

    def extract_entities(self, message: str) -> Optional[NERResponse]:
        """Perform NER to extract locations, people, and organizations from the message."""
        system_message = """
            Act as an entity recognition system. Your task is to extract the following entities from the provided Arabic Telegram message:
            
            - Locations: Geographical places mentioned in the message.
            - People: Names of individuals mentioned.
            - Organizations: Names of groups, companies, or military organizations mentioned.

            Ensure that:
            - You return only the names without extra context or symbols.
            - If no entities are found, return empty lists for each category.
        """
        
        response = self.client.chat(
            system_message=system_message,
            user_message=message,
            response_format=NERResponse
        )

        return response

class MainProcessor:
    """Class to manage the full pipeline: reading messages, translating, extracting entities, and updating DataFrame."""
    
    def __init__(self, translator: MessageTranslator, ner_processor: NERProcessor, df: pd.DataFrame):
        self.translator = translator
        self.ner_processor = ner_processor
        self.df = df.copy()  # Avoid modifying the original df

        # Prepare new columns for translations and entity recognition
        self.df['hebrew_translation'] = None
        self.df['english_translation'] = None
        self.df['locations'] = None
        self.df['people'] = None
        self.df['organizations'] = None

    def run(self) -> pd.DataFrame:
        """Run the full pipeline, translating each message, extracting entities, and returning an updated DataFrame."""
        for index, row in tqdm(self.df.iterrows(), total=len(self.df), desc="Processing messages"):
            message = row['message']
            
            # Perform translation
            translation = self.translator.translate_message(message)
            if translation:
                self.df.at[index, 'hebrew_translation'] = translation.hebrew
                self.df.at[index, 'english_translation'] = translation.english
            else:
                print(f"No translation found for message_id {row['message_id']}.")

            # Perform NER
            ner_data = self.ner_processor.extract_entities(translation.english)
            if ner_data:
                self.df.at[index, 'locations'] = ", ".join(ner_data.locations)
                self.df.at[index, 'people'] = ", ".join(ner_data.people)
                self.df.at[index, 'organizations'] = ", ".join(ner_data.organizations)
            else:
                print(f"No NER data found for message_id {row['message_id']}.")

        # Return the updated DataFrame with translations and NER data
        return self.df

if __name__ == "__main__":
    # Specify the file path
    file_path = "telegram_messages.json"  # Replace with the actual file path if different
    
    # Read the JSON file
    with open(file_path, "r", encoding="utf-8") as f:
        data = json.load(f)
    
    # Convert the JSON data to a Pandas DataFrame
    df = pd.json_normalize(data)

    # Initialize the OpenAI client, translators, and processors
    openai_client = OpenAIClient()
    message_translator = MessageTranslator(openai_client)
    ner_processor = NERProcessor(openai_client)
    main_processor = MainProcessor(message_translator, ner_processor, df)

    # Run the pipeline and get the updated DataFrame
    updated_df = main_processor.run()

    # Print the total costs of OpenAI calls
    openai_client.print_total_costs()

    # Display the updated DataFrame with translations and entity recognition
    display(updated_df[['message', 'hebrew_translation', 'english_translation', 'locations', 'people', 'organizations']])


Using closest match for encoding: 'gpt-4o' for input model 'gpt-4o-2024-08-06'
Resolved model for encoding: 'gpt-4o'
Using pricing model: 'gpt-4o-2024-08-06'


Processing messages: 100%|███████████████████████████████████████| 25/25 [01:15<00:00,  3.03s/it]

Total prompt cost: $0.021985
Total completion cost: $0.020620
Overall total cost: $0.042605





Unnamed: 0,message,hebrew_translation,english_translation,locations,people,organizations
0,♦️إعلام العدو يتحدث عن حدث أمني صعب.,תקשורת האויב מדברת על אירוע ביטחוני קשה.,Enemy media talks about a serious security incident.,,,
1,♦️ يسرائيل للو تسنزورا: إصابة قاعدة تدريب غولاني \nبطائرة\n مسيرة تابعة للحزب في عكا قبل قليل,ישראל לא תצנזור: פגיעה בבסיס אימונים של גולני ברחפן של החזבאללה בעכו לפני זמן קצר.,Israel will not censor: Golani training base hit by a Hezbollah drone in Acre recently.,Acre,,"Israel, Golani, Hezbollah"
2,محكمة الاحتلال تُمدد الحكم على الأسير سليم الرجوب من سكان حي كريسة بمدينة دورا للمرة الثالثة على التوالي لمدة ستة أشهر إضافية.,"בית המשפט של הכיבוש מאריך את פסק הדין על האסיר סלים רג'וב, תושב שכונת קריסה בעיר דורה, בפעם השלישית ברציפות למשך שישה חודשים נוספים.","The occupation court extends the sentence of detainee Salim Rajoub, a resident of the Karisah neighborhood in the city of Dura, for the third consecutive time for an additional six months.","Karisah, Dura",Salim Rajoub,
3,♦️المتحدثة باسم الشرطة الصهيونيه : المتظاهرون ضد الحكومة عرضوا حياة ضباط الشرطة للخطر أمس عندما تعمدوا إشعال حريق كبير على بعد بوصات منهم.,דוברת המשטרה הציונית: המפגינים נגד הממשלה סיכנו את חיי קציני המשטרה אתמול כאשר בכוונה תחילה הציתו שריפה גדולה במרחק של אינצ'ים מהם.,The spokesperson for the Zionist police: The demonstrators against the government endangered the lives of police officers yesterday when they deliberately ignited a large fire just inches away from them.,,,Zionist police
4,♦️ القسام يعلن تفجير عين نفق في قوة اسرائيلية راجلة شمال مدينة بيت لاهيا شمال القطاع,הקסאם מכריז על פיצוץ פתח מנהרה בכוח ישראלי רגלי בצפון העיר בית להיה בצפון הרצועה,Al-Qassam announces the detonation of a tunnel entrance against an Israeli foot patrol north of Beit Lahia in the northern part of the Gaza Strip,"Beit Lahia, Gaza Strip",,Al-Qassam
5,🛑اعتراض الطائرة بدون طيار في سماء كريات قبل قليل,יירוט כלי טיס בלתי מאויש מעל קריית לפני זמן קצר,Interception of an unmanned aerial vehicle over Kiryat a short while ago,Kiryat,,
6,"🚫🟠🟠 🟢 كتائب القسام تستهدف ناقلة جند صهيونية بقذيفة ""تاندوم"" شمال مدينة بيت لاهيا شمال القطاع","גדודי עז א-דין אל-קסאם תוקפים רכב משוריין ישראלי עם רקטה ""טאנדם"" בצפון בית לייה בצפון הרצועה",Al-Qassam Brigades target a Zionist armored vehicle with a 'Tandem' missile north of Beit Lahia in the northern Gaza Strip,"Beit Lahia, northern Gaza Strip",,Al-Qassam Brigades
7,♦️بالفيديو | كلمة سماحة الأمين العام الشهيد الي ( المجاهدين خلال إحدى المناورات العسكرية,"בסרטון | נאום של מזכ""ל הקדוש, השהיד, לאחים הלוחמים במהלך אחד התרגילים הצבאיים","In the video | Speech by the Sacred Secretary-General, the martyr, to the mujahideen during one of the military exercises",,,
8,♦️عاجل| حزب الله يشن هجوماً مركباً بالطائرات المسيّرة والصواريخ على شمال فلسطين المحتلة.,דחוף| חזבאללה מבצע התקפה משולבת באמצעות רחפנים וטילים על צפון פלסטין הכבושה.,Urgent| Hezbollah launches a combined attack with drones and missiles on northern occupied Palestine.,northern occupied Palestine,,Hezbollah
9,♦️التقارير الأولية دون رقابة: \nإصابة مستوطن نتيجة سقوط شظايا في منطقة كريات شمونة.,דיווחים ראשוניים ללא צנזורה: נפגע מתנחל כתוצאה מנפילת רסיסים באזור קריית שמונה.,Initial uncensored reports: A settler was injured due to falling shrapnel in the Kiryat Shmona area.,Kiryat Shmona,,


# Event Management

In [93]:
updated_df.columns

Index(['channel', 'message_id', 'timestamp', 'message', 'media',
       'metadata.sender_id', 'metadata.message_type', 'hebrew_translation',
       'english_translation', 'locations', 'people', 'organizations'],
      dtype='object')

In [95]:
import pandas as pd
from datetime import datetime, timedelta
from pydantic import BaseModel
from typing import List, Optional
from openai_client import OpenAIClient

class EventUpdateResponse(BaseModel):
    """Response model for event updates."""
    event_type: str
    event_name: str

class TelegramMessageProcessor:
    """Processor class to handle Telegram message classification and updates."""
    
    def __init__(self, client: OpenAIClient):
        self.client = client
        self.active_events = []  # List to track currently active events

    def classify_message(self, message: pd.Series, current_events: List[dict]) -> Optional[EventUpdateResponse]:
        """Classify a Telegram message into an event type and generate an event name."""
        
        # Create a formatted string to pass the current event names and last update timestamps
        event_info = "\n".join([f"Event: {event['name']} (Last update: {event['last_update']})" for event in current_events])
        
        system_message = f"""
You are an event classifier. Based on the following ongoing events and their last update dates, classify the new message into one of 
these events or suggest a new event name.

Ongoing events:
{event_info}

Important Note: If the event mentioned in the message seems to refer to something that happened after a long time or a separate instance
of a similar event, classify it as a new event.

Message:
        """
        user_message = f"{message['english_translation']}, which sent in: {message['timestamp']}"
        
        # print("------------------------------------------------------------------------------------")
        # print(system_message)
        # print(user_message)

        response = self.client.chat(
            system_message=system_message,
            user_message=user_message,
            response_format=EventUpdateResponse
        )

        return response
    
    def update_event(self, classification: EventUpdateResponse, message: str, timestamp: datetime) -> str:
        """Update an existing event with new information, or create a new event."""
        event_name = classification.event_name or f"{classification.event_type} Event {len(self.active_events) + 1}"
        event_exists = False

        for event in self.active_events:
            # Check if the message belongs to this event by name
            if event['name'] == event_name:
                # Update existing event
                event['messages'].append(message)
                event['timestamps'].append(timestamp)
                event['last_update'] = timestamp
                event_exists = True
                print(f"Message added to existing event: {event_name}")
                break

        if not event_exists:
            # Create a new event if not found
            new_event = {
                'type': classification.event_type,
                'name': event_name,
                'messages': [message],
                'timestamps': [timestamp],
                'first_report': timestamp,
                'last_update': timestamp
            }
            self.active_events.append(new_event)
            print(f"New event created: {event_name}")

        print(f"Message: {message['english_translation']}, Event Type: {classification.event_type}\n")
        return event_name
    
    def process_new_message(self, message: pd.Series) -> str:
        """Process a new incoming Telegram message."""
        current_event_names = [{'name': event['name'], 'last_update': event['last_update']} for event in self.active_events]
        classification = self.classify_message(message, current_event_names)
        
        if classification:
            event_name = self.update_event(classification, message, timestamp)
            return event_name
        else:
            print("Message could not be classified.")
            return "Unclassified"

    def get_active_events(self):
        """Get the current list of active events."""
        return [{"name": event['name'], "type": event['type'], "last_update": event['last_update']} for event in self.active_events]

# Create a pandas DataFrame to store messages, timestamps, and event names
df = updated_df.copy()

# Simulating the processing of the messages
client = OpenAIClient()  # Initialize OpenAI client
processor = TelegramMessageProcessor(client)

# Process each fake message and store the event name in the DataFrame
for index, row in df.iterrows():
    event_name = processor.process_new_message(row)
    df.at[index, "event_name"] = event_name

# Display the final DataFrame with event names, messages, and timestamps
display(df[['event_name', 'english_translation', 'timestamp']].sort_values(by=['event_name', 'timestamp'], ascending=[True, True]))

# Count event messages
print(df['event_name'].value_counts())

# Print the total costs of OpenAI calls
openai_client.print_total_costs()

Using closest match for encoding: 'gpt-4o' for input model 'gpt-4o-2024-08-06'
Resolved model for encoding: 'gpt-4o'
Using pricing model: 'gpt-4o-2024-08-06'
New event created: 2024 Security Incident Discussion by Media
Message: Enemy media talks about a serious security incident., Event Type: new

New event created: 2024 Hezbollah Drone Strike in Acre
Message: Israel will not censor: Golani training base hit by a Hezbollah drone in Acre recently., Event Type: New Event

New event created: 2024 Extension of Salim Rajoub's Detention in Dura
Message: The occupation court extends the sentence of detainee Salim Rajoub, a resident of the Karisah neighborhood in the city of Dura, for the third consecutive time for an additional six months., Event Type: New Event

New event created: 2024 Protest and Fire Incident Against Government in Zionist Region
Message: The spokesperson for the Zionist police: The demonstrators against the government endangered the lives of police officers yesterday when

Unnamed: 0,event_name,message,timestamp
6,2024 Al-Qassam Brigade Missile Attack on Armored Vehicle in Beit Lahia,"🚫🟠🟠 🟢 كتائب القسام تستهدف ناقلة جند صهيونية بقذيفة ""تاندوم"" شمال مدينة بيت لاهيا شمال القطاع",2024-10-13 19:19:43
4,2024 Al-Qassam Tunnel Detonation Incident in Beit Lahia,♦️ القسام يعلن تفجير عين نفق في قوة اسرائيلية راجلة شمال مدينة بيت لاهيا شمال القطاع,2024-10-13 19:29:07
16,2024 Arrival of American THAAD System in Israel,♦️البنتاغون في إعلان رسمي: نظام الدفاع الجوي بعيد المدى ثاد التابع للجيش الأمريكي سيصل إلى إسرائيل.\n\nومن المتوقع أن يصل معها نحو 100 جندي.,2024-10-13 18:55:04
24,2024 Clashes in Tuqu' with Occupation Forces,اندلاع مواجهات بين الشبان وقوات الاحتلال في بلدة تقوع قضاء بيت لحم.,2024-10-13 18:36:50
20,2024 Drone Attack and Siren Sounding in Nahariya,♦️هجوم بلمسيرات في نهاريا و صفارات الانذار لا تزال تدوي.\n@FROM_HEBRON,2024-10-13 18:49:12
18,2024 Explosions Heard in Occupied Acre,♦️#عاجل..\n\nسماع دوي انفجارات في عكا المحتلة ..,2024-10-13 18:52:47
2,2024 Extension of Salim Rajoub's Detention in Dura,محكمة الاحتلال تُمدد الحكم على الأسير سليم الرجوب من سكان حي كريسة بمدينة دورا للمرة الثالثة على التوالي لمدة ستة أشهر إضافية.,2024-10-13 19:31:11
17,2024 Hezbollah Combined Drone and Missile Attack on Northern Occupied Palestine,♦️مسيرة اخترقت الناقورة ونهاريا و عكا والا ع مشارف حيفا,2024-10-13 18:53:02
15,2024 Hezbollah Combined Drone and Missile Attack on Northern Occupied Palestine,♦️يبدو أن الليله سوف تكون جحيم على الاحتلال \nحزب الله يضرب الكيان بكل قوه الان,2024-10-13 18:56:55
12,2024 Hezbollah Combined Drone and Missile Attack on Northern Occupied Palestine,♦️عاجل: مصادر عبرية: دوي عشرات الانفجارات تسمع في عدة مناطق شمال فلسطين المحتلة، بعد رشقة صاروخية ومسيرات من #لبنان.,2024-10-13 19:10:38


event_name
2024 Hezbollah Combined Drone and Missile Attack on Northern Occupied Palestine    4
2024 Security Alert in Upper and Western Galilee                                   2
2024 Kiryat Shmona Shrapnel Injury Incident                                        2
2024 Security Incident Discussion by Media                                         1
2024 Large Rocket Barrage from Lebanon on Northern Occupied Palestine              1
2024 Unidentified Aerial Activity Over Galilee                                     1
2024 Drone Attack and Siren Sounding in Nahariya                                   1
2024 Saraya al-Quds Missile Attack on Merkava Tank in Jabalia                      1
2024 Explosions Heard in Occupied Acre                                             1
2024 Arrival of American THAAD System in Israel                                    1
2024 Sirens Sounding Incident in Zarit                                             1
2024 Martyrdom of Singer Hamza Abu Qinas 'Abu Bilal'  

In [112]:
import pandas as pd
from datetime import datetime
from pydantic import BaseModel
from typing import List, Dict, Optional
from enum import Enum
from openai_client import OpenAIClient

# Predefined event types (fixed)
class EventType(Enum):
    MILITARY_ATTACKS_STRIKES = "Military Attacks and Strikes"
    SECURITY_INCIDENTS = "Security Incidents"
    ROCKET_INTERCEPTIONS_BARRAGES = "Rocket or Drone Interceptions or Rocket Barrages or Sirens"
    CIVILIAN_CASUALTIES_CLASHES = "Civilian Casualties and Injuries and Clashes"
    DEATH_ANNOUNCEMENTS = "Death Announcements"
    POLITICAL_GOVERNMENT_UPDATES = "Political and Government Updates"
    MILITARY_MOVEMENTS_ANNOUNCEMENTS = "Military Movements or Announcements"

# Response model for event classification
class EventUpdateResponse(BaseModel):
    event_type: EventType   # Predefined event type
    event_number: Optional[int]  # Event number (if it exists)
    event_name: Optional[str]    # Optional event name for new events

# New model that includes message ID and its classification
class MessageClassification(BaseModel):
    message_id: str
    classification: EventUpdateResponse

class ClassificationResponse(BaseModel):
    classifications: List[MessageClassification]

class TelegramMessageProcessor:
    def __init__(self, client: OpenAIClient):
        self.client = client
        self.active_events = []  # List of active events
        self.event_counter = 1   # Counter for new events
    
    def classify_messages_batch(self, messages: List[pd.Series]) -> List[MessageClassification]:
        """Classify a batch of messages and associate each message with its classification."""
        
        # Prepare the event info to send to the classifier
        current_event_info = "\n".join([f"Event: {event['name']} (Last update: {event['last_update']})" 
                                        for event in self.active_events])
                
        # Corrected code for handling a pandas DataFrame
        formatted_messages = "\n".join([f"Message {idx+1} (ID: {message['message_id']}): {message['english_translation']}, sent at {message['timestamp']}" 
                                        for idx, (_, message) in enumerate(messages.iterrows())])
        system_message = f"""
You are an event classifier. Based on the ongoing events, classify each message into one of the existing events or suggest a new event.
The event type must match one of the predefined types (Security, Weather, Traffic), but you can assign a new event number if it's a new instance.

Ongoing events:
{current_event_info}

Messages to classify:
"""
        print(system_message)
        print(formatted_messages)

        # Call OpenAI API with the batch of messages and retrieve the classification
        classifications = self.client.chat(
            system_message=system_message,
            user_message=formatted_messages,
            response_format=ClassificationResponse,
            max_tokens=1000
        )

        print(classifications)
        
        # Tie each message ID to its classification and return a list of message-classification pairs
        message_classifications = []
        for message, classification in zip(messages, classifications):
            message_classifications.append(MessageClassification(
                message_id=message['id'],  # Use the message's unique ID
                classification=classification
            ))

        return message_classifications
    
    def update_event(self, classification: EventUpdateResponse, message: pd.Series) -> str:
        """Update existing event or create a new event instance."""
        
        # If the event number exists, it's an existing event
        if classification.event_number:
            event_name = f"{classification.event_type.value} Event {classification.event_number}"
        else:
            # Otherwise, create a new event instance
            event_name = f"{classification.event_type.value} Event {self.event_counter}"
            self.event_counter += 1  # Increment the counter for the next new event
        
        # Check if the event already exists, and update or create it
        event_exists = False
        for event in self.active_events:
            if event['name'] == event_name:
                event['messages'].append(message['english_translation'])
                event['timestamps'].append(message['timestamp'])
                event['last_update'] = message['timestamp']
                event_exists = True
                break

        if not event_exists:
            # Add a new event if it doesn't exist
            new_event = {
                'type': classification.event_type,
                'name': event_name,
                'messages': [message['english_translation']],
                'timestamps': [message['timestamp']],
                'first_report': message['timestamp'],
                'last_update': message['timestamp']
            }
            self.active_events.append(new_event)

        return event_name

    def process_new_messages_batch(self, messages: List[pd.Series]) -> List[Dict[str, str]]:
        """Process a batch of messages and return a list of message IDs with their event names."""
        
        # Classify the batch of messages and get their classifications
        message_classifications = self.classify_messages_batch(messages)
        
        # Process each message and update the events
        message_event_map = []
        for message_class in message_classifications:
            message_id = message_class.message_id
            classification = message_class.classification
            
            # Find the actual message in the batch by its ID
            message = next(msg for msg in messages if msg['id'] == message_id)
            
            # Update or create a new event and store the event name
            event_name = self.update_event(classification, message)
            
            # Map the message ID to the corresponding event name
            message_event_map.append({
                'message_id': message_id,
                'event_name': event_name
            })

        return message_event_map

# Assuming df contains columns like: 'message_id', 'english_translation', 'timestamp', etc.
df = updated_df.copy()

# Simulating the processing of the messages
client = OpenAIClient()  # Initialize OpenAI client
processor = TelegramMessageProcessor(client)

# Prepare a list to store processed results (message ID, event name, event type)
processed_data = []

# Process each batch of messages and store results
batch_size = 10  # Or however many messages you want to process in a batch
for i in range(0, len(df), batch_size):
    batch = df.iloc[i:i + batch_size]  # Get a batch of messages
    batch_results = processor.process_new_messages_batch(batch)  # Process the batch
    
    # Collect results for this batch
    for idx, result in enumerate(batch_results, start=1):  # Start index from 1 for message ID
        event_name = result['event_name']
        
        # Get the message from the batch using index
        message_row = batch.iloc[idx - 1]
        
        # Extract the classification event type
        event_type = processor.dynamic_event_names.get(event_name)  # Get event type from dynamic event names
        
        # Append results to the processed_data list with all relevant metadata
        processed_data.append({
            'batch_message_id': idx,  # Position of the message within the batch (1 to 10)
            'message_id': message_row['message_id'],  # Original message ID from the data
            'event_name': event_name,
            'event_type': event_type,
            'english_translation': message_row['english_translation'],
            'timestamp': message_row['timestamp']
        })

# Convert processed_data to a DataFrame for easier sorting and display
processed_df = pd.DataFrame(processed_data)

# Sort the DataFrame by event name and timestamp
sorted_df = processed_df.sort_values(by=['event_name', 'timestamp'], ascending=[True, True])

# Display the final DataFrame with all metadata including event names, types, and timestamps
display(sorted_df[['batch_message_id', 'message_id', 'event_name', 'event_type', 'english_translation', 'timestamp']])

# Count event occurrences
print(sorted_df['event_name'].value_counts())

# Print the total costs of OpenAI calls
openai_client.print_total_costs()

Using closest match for encoding: 'gpt-4o' for input model 'gpt-4o-2024-08-06'
Resolved model for encoding: 'gpt-4o'
Using pricing model: 'gpt-4o-2024-08-06'

You are an event classifier. Based on the ongoing events, classify each message into one of the existing events or suggest a new event.
The event type must match one of the predefined types (Security, Weather, Traffic), but you can assign a new event number if it's a new instance.

Ongoing events:


Messages to classify:

Message 1 (ID: 270263): Enemy media talks about a serious security incident., sent at 2024-10-13 19:32:55
Message 2 (ID: 270262): Israel will not censor: Golani training base hit by a Hezbollah drone in Acre recently., sent at 2024-10-13 19:32:20
Message 3 (ID: 270261): The occupation court extends the sentence of detainee Salim Rajoub, a resident of the Karisah neighborhood in the city of Dura, for the third consecutive time for an additional six months., sent at 2024-10-13 19:31:11
Message 4 (ID: 270260): The 

TypeError: 'NoneType' object is not iterable

# Creating Flow Timeline

In [40]:
from pydantic import BaseModel
from typing import List, Optional
from datetime import datetime

class EventUpdateResponse(BaseModel):
    """Response model for event updates."""
    event_type: str
    summary: str

class TelegramMessageProcessor:
    """Processor class to handle Telegram message classification and updates."""
    
    def __init__(self, client: OpenAIClient):
        self.client = client
        self.active_events = []  # List to track currently active events
    
    def classify_message(self, message: str) -> Optional[EventUpdateResponse]:
        """Classify a Telegram message into an event type and generate a summary."""
        system_message = """
            You are an event classifier. Classify the following message into one of the event types: Protest, Fire, Accident, or Unknown.

            After classification, summarize the message briefly. The summary should be clear and concise, capturing the key information of the event.

            Event Types:
            - Protest
            - Fire
            - Accident
            - Unknown
            
            Message:
        """
        
        response = self.client.chat(
            system_message=system_message,
            user_message=message,
            response_format=EventUpdateResponse
        )

        if response and response.choices and hasattr(response.choices[0].message, 'parsed'):
            return response.choices[0].message.parsed  # Parsed classification and summary
        return None
    
    def update_event(self, event_type: str, message: str):
        """Update an existing event with new information, or create a new event."""
        event_exists = False
        for event in self.active_events:
            if event['type'] == event_type:
                # Update existing event
                event['messages'].append(message)
                event['last_update'] = datetime.now()
                event['summary'] = self.summarize_event(event['messages'])
                event_exists = True
                break
        
        if not event_exists:
            # Create a new event
            new_event = {
                'type': event_type,
                'messages': [message],
                'first_report': datetime.now(),
                'last_update': datetime.now(),
                'summary': message  # Initial summary based on the first message
            }
            self.active_events.append(new_event)
    
    def summarize_event(self, messages: List[str]) -> str:
        """Summarize the event by combining related messages."""
        system_message = """
            Summarize the following set of messages into a concise and clear event summary:
        """
        message_list = "\n".join(messages)
        response = self.client.chat(
            system_message=system_message,
            user_message=message_list
        )
        
        return response.choices[0].message.content if response else "Summary unavailable."

    def process_new_message(self, message: str):
        """Process a new incoming Telegram message."""
        classification = self.classify_message(message)
        
        if classification:
            self.update_event(classification.event_type, message)
        else:
            print("Message could not be classified.")
    
    def get_active_events(self):
        """Get the current list of active events with summaries."""
        return [{"type": event['type'], "summary": event['summary'], "last_update": event['last_update']} for event in self.active_events]

# Main flow
import pandas as pd
from datetime import datetime, timedelta
import random

# Fake messages for demonstration purposes
fake_messages = [
    "Protest in London against tax reforms.",
    "Fire breaks out near the protest area in London.",
    "More protesters joining the march in Manchester.",
    "Accident on the highway blocks traffic in Birmingham.",
    "Massive protests erupt in downtown Paris.",
    "A large fire reported in an industrial complex.",
    "Protesters clash with police in Madrid.",
    "Earthquake detected near the city of Istanbul.",
    "More people join the protest in Barcelona.",
    "Firefighters are battling a fire in a residential area.",
    "Protest in New York over climate policies.",
    "Explosion heard near a protest in Beirut.",
    "Flooding reported after heavy rains in Venice.",
    "Protests in Hong Kong demanding political reforms.",
    "Accident involving multiple cars reported in Berlin."
]

# Generate random timestamps (simulating message times in the past 2 hours)
base_time = datetime.now()
timestamps = [base_time - timedelta(minutes=random.randint(1, 120)) for _ in range(len(fake_messages))]

# Create a pandas DataFrame to store messages and timestamps
df = pd.DataFrame({
    "message": fake_messages,
    "timestamp": timestamps
})

# Simulating the processing of the messages
client = OpenAIClient(show_prices=False)
processor = TelegramMessageProcessor(client)

# Process each fake message and store the results
for index, row in df.iterrows():
    message = row["message"]
    timestamp = row["timestamp"]
    print(f"Processing message at {timestamp}: {message}")
    
    processor.process_new_message(message)

# Check active events after processing all messages
active_events = processor.get_active_events()
for event in active_events:
    print(f"Event Type: {event['type']}, Last Update: {event['last_update']}, Summary: {event['summary']}")

# Display the DataFrame (with messages and timestamps)
df

Using closest match for encoding: 'gpt-4o' for input model 'gpt-4o-2024-08-06'
Resolved model for encoding: 'gpt-4o'
Using pricing model: 'gpt-4o-2024-08-06'
Processing message at 2024-10-14 00:06:25.862490: Protest in London against tax reforms.
Processing message at 2024-10-13 23:15:25.862490: Fire breaks out near the protest area in London.
Processing message at 2024-10-13 23:27:25.862490: More protesters joining the march in Manchester.
Processing message at 2024-10-13 23:09:25.862490: Accident on the highway blocks traffic in Birmingham.
Processing message at 2024-10-13 22:59:25.862490: Massive protests erupt in downtown Paris.
Processing message at 2024-10-13 23:02:25.862490: A large fire reported in an industrial complex.
Processing message at 2024-10-14 00:13:25.862490: Protesters clash with police in Madrid.
Processing message at 2024-10-13 23:45:25.862490: Earthquake detected near the city of Istanbul.
Processing message at 2024-10-13 23:20:25.862490: More people join the pro

Unnamed: 0,message,timestamp
0,Protest in London against tax reforms.,2024-10-14 00:06:25.862490
1,Fire breaks out near the protest area in London.,2024-10-13 23:15:25.862490
2,More protesters joining the march in Manchester.,2024-10-13 23:27:25.862490
3,Accident on the highway blocks traffic in Birmingham.,2024-10-13 23:09:25.862490
4,Massive protests erupt in downtown Paris.,2024-10-13 22:59:25.862490
5,A large fire reported in an industrial complex.,2024-10-13 23:02:25.862490
6,Protesters clash with police in Madrid.,2024-10-14 00:13:25.862490
7,Earthquake detected near the city of Istanbul.,2024-10-13 23:45:25.862490
8,More people join the protest in Barcelona.,2024-10-13 23:20:25.862490
9,Firefighters are battling a fire in a residential area.,2024-10-13 22:18:25.862490


In [42]:
active_events

[{'type': 'Protest',
  'summary': 'In late 2023, significant protests occurred globally in response to various political and social issues. London and Manchester witnessed demonstrations against tax reforms, while downtown Paris also saw considerable unrest. In Spain, protesters clashed with police in Madrid, with additional protests occurring in Barcelona. New York experienced demonstrations over climate policies. An explosion was reported near a protest in Beirut, and in Hong Kong, people protested demanding political reforms.',
  'last_update': datetime.datetime(2024, 10, 14, 0, 16, 0, 302282)},
 {'type': 'Fire',
  'summary': 'A fire occurred in an industrial complex near a protest area in London, with firefighters actively working to control the blaze.',
  'last_update': datetime.datetime(2024, 10, 14, 0, 15, 52, 132604)},
 {'type': 'Accident',
  'summary': 'There were two separate accidents reported: one blocking highway traffic in Birmingham and another involving multiple cars in