In [36]:
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Dropout, Bidirectional, Attention
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.callbacks import EarlyStopping
import spacy

# Load spaCy's English model
nlp = spacy.load('en_core_web_sm')

# Function to parse a TML file (TimeML format) and extract events, T-LINKs, and TIMEX3
def parse_tml_with_context(file_path):
    events = []
    t_links = []
    timex3 = []

    tree = ET.parse(file_path)
    root = tree.getroot()

    for event in root.findall('.//EVENT'):
        event_id = event.attrib.get('eid', 'N/A')  # Get event ID, default to 'N/A' if not found
        text_element = event.find('TEXT')
        if text_element is not None:  # Check if TEXT element exists
            event_text = text_element.text.strip()
            events.append((event_id, event_text))
        else:
            print(f"Warning: No TEXT element found for event ID {event_id}.")  # Debug info

    for link in root.findall('.//T_LINK'):
        t_links.append(link.attrib)

    for timex in root.findall('.//TIMEX3'):
        timex3.append(timex.attrib)

    return events, t_links, timex3


# Load the datasets
timebank_events, _, _ = parse_tml_with_context('TimeBank.tml')
timeeval3_events, _, _ = parse_tml_with_context('TimeEval3.tml')

# Sample dataset preparation (using events from TimeBank for demonstration)
sample_data = [event[1] for event in timebank_events]  # Extracting event texts

# Prepare input data for events
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sample_data)

# Define model parameters
max_length = 50  # Define max sequence length
embedding_dim = 128  # Define embedding dimensions
lstm_units = 64  # Define LSTM units

# Build the LSTM model with Attention Mechanism
def build_model(vocab_size, embedding_dim, max_length, lstm_units):
    input_layer = Input(shape=(max_length,))
    embedding_layer = Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=max_length)(input_layer)
    lstm_layer = Bidirectional(LSTM(lstm_units, return_sequences=True))(embedding_layer)
    attention_layer = Attention()([lstm_layer, lstm_layer])  # Attention mechanism
    flatten_layer = Dropout(0.5)(attention_layer)
    output_layer = Dense(1, activation='sigmoid')(flatten_layer)

    model = Model(inputs=input_layer, outputs=output_layer)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    return model

# Build and summarize the model
vocab_size = len(tokenizer.word_index) + 1
attention_model = build_model(vocab_size, embedding_dim, max_length, lstm_units)
attention_model.summary()

# Function to extract events using spaCy's POS tagging (verbs as events)
def extract_events_with_spacy(sentence):
    doc = nlp(sentence)
    events = []

    for token in doc:
        # Detect events with verbs
        if token.pos_ == 'VERB':
            events.append(token.text)

    return events

# Function to prepare event sequences for a given sentence
def prepare_event_sequences(events, tokenizer, maxlen):
    event_sequences = tokenizer.texts_to_sequences(events)  # Convert events to sequences
    padded_sequences = pad_sequences(event_sequences, padding='post', maxlen=maxlen)  # Pad sequences
    return padded_sequences

# Function to order events based on model predictions
def predict_event_order(sentence, model, tokenizer, maxlen):
    events = extract_events_with_spacy(sentence)

    if not events:
        return "No events detected in the sentence."

    event_sequences = prepare_event_sequences(events, tokenizer, maxlen)
    predictions = model.predict(event_sequences)  # Get predictions for each event

    # Get predicted classes (indices of the highest predicted score)
    predicted_classes = np.argmax(predictions, axis=1)

    # Sort the events based on their predicted class scores
    sorted_events = sorted(zip(predicted_classes, events), key=lambda x: x[0])

    # Return the ordered events
    ordered_events = [event for _, event in sorted_events]
    return ordered_events

# Define a complex sample sentence with interrelated events
complex_sample_sentence = (
    "After Alice woke up, she decided to prepare breakfast. "
    "While the eggs were frying, her friend Bob arrived unexpectedly. "
    "They chatted for a while, and then they both went for a jog in the park before it started to rain."
)

# Predict event order
ordered_events_complex = predict_event_order(complex_sample_sentence, attention_model, tokenizer, max_length)
print("Predicted Order of Events:", ordered_events_complex)




[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345ms/step
Predicted Order of Events: ['woke', 'decided', 'prepare', 'frying', 'arrived', 'chatted', 'went', 'started', 'rain']
