# Temporal Ordering in PyTorch


In [6]:
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset

# Function to parse a TML file (TimeML format) and extract events, T-LINKs, and TIMEX3
def parse_tml_with_context(file_path):
    tree = ET.parse(file_path)
    root = tree.getroot()

    events = []
    tlinks = []
    timex3s = []

    for s in root.iter('TEXT'):
        sentence_text = s.text
        for event in s.iter('EVENT'):
            event_id = event.attrib['eid']
            event_text = event.text
            events.append({'EVENT ID': event_id, 'EVENT Text': event_text, 'Context Sentence': sentence_text})

        for timex in s.iter('TIMEX3'):
            timex_id = timex.attrib['tid']
            timex_text = timex.text
            timex3s.append({'TIMEX3 ID': timex_id, 'TIMEX3 Text': timex_text})

    for tlink in root.iter('TLINK'):
        event_id_1 = tlink.attrib.get('eventInstanceID')
        event_id_2 = tlink.attrib.get('relatedToEventInstance')
        relation = tlink.attrib.get('relType')

        if event_id_1 and event_id_2:
            tlinks.append({'Event ID 1': event_id_1, 'Event ID 2': event_id_2, 'Relation': relation})

    events_df = pd.DataFrame(events)
    timex3_df = pd.DataFrame(timex3s)
    tlinks_df = pd.DataFrame(tlinks)

    return events_df, timex3_df, tlinks_df

# Load datasets
timebank_events_df, timebank_timex3_df, timebank_tlinks_df = parse_tml_with_context('TimeBank.tml')
timeeval3_events_df, timeeval3_timex3_df, timeeval3_tlinks_df = parse_tml_with_context('TimeEval3.tml')

# Combine datasets
combined_events_df = pd.concat([timebank_events_df, timeeval3_events_df], ignore_index=True)
combined_timex3_df = pd.concat([timebank_timex3_df, timeeval3_timex3_df], ignore_index=True)
combined_tlinks_df = pd.concat([timebank_tlinks_df, timeeval3_tlinks_df], ignore_index=True)

# Prepare input data for events
from tensorflow.keras.preprocessing.text import Tokenizer
from keras.preprocessing.sequence import pad_sequences

tokenizer = Tokenizer()
tokenizer.fit_on_texts(combined_events_df['EVENT Text'].tolist())
event_sequences = tokenizer.texts_to_sequences(combined_events_df['EVENT Text'].tolist())
padded_event_sequences = pad_sequences(event_sequences, padding='post', dtype='int32')

# Label encoding for relations
relation_mapping = {relation: idx for idx, relation in enumerate(combined_tlinks_df['Relation'].unique())}
encoded_labels = []
for index, row in combined_events_df.iterrows():
    relation = combined_tlinks_df[
        (combined_tlinks_df['Event ID 1'] == row['EVENT ID']) |
        (combined_tlinks_df['Event ID 2'] == row['EVENT ID'])
    ]['Relation']

    if not relation.empty:
        encoded_labels.append(relation_mapping[relation.values[0]])
    else:
        encoded_labels.append(len(relation_mapping))  # For "no relation"

encoded_labels = np.array(encoded_labels)

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_event_sequences, encoded_labels, test_size=0.2, random_state=42)

# Convert data to PyTorch tensors
X_train = torch.tensor(X_train, dtype=torch.long)
X_val = torch.tensor(X_val, dtype=torch.long)
y_train = torch.tensor(y_train, dtype=torch.long)
y_val = torch.tensor(y_val, dtype=torch.long)

# Create DataLoader for batching
batch_size = 32
train_data = TensorDataset(X_train, y_train)
val_data = TensorDataset(X_val, y_val)
train_loader = DataLoader(train_data, shuffle=True, batch_size=batch_size)
val_loader = DataLoader(val_data, batch_size=batch_size)

# Define the PyTorch LSTM model
class EventOrderingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, output_dim, dropout=0.5):
        super(EventOrderingModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, num_layers=2, dropout=dropout, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(hidden_dim * 2, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        embedded = self.dropout(self.embedding(x))
        lstm_out, _ = self.lstm(embedded)
        lstm_out = lstm_out[:, -1, :]  # Take the output of the last time step
        output = self.fc(self.dropout(lstm_out))
        return output

# Model parameters
vocab_size = len(tokenizer.word_index) + 1
embedding_dim = 128
hidden_dim = 64
output_dim = len(relation_mapping) + 1  # Include "no relation" class

# Instantiate the model
model = EventOrderingModel(vocab_size, embedding_dim, hidden_dim, output_dim)

# Define optimizer and loss function
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()

# Train the model
epochs = 10
for epoch in range(epochs):
    model.train()
    running_loss = 0
    correct = 0
    total = 0
    for inputs, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    train_accuracy = 100 * correct / total
    print(f'Epoch {epoch + 1}/{epochs}, Loss: {running_loss/len(train_loader)}, Accuracy: {train_accuracy}')

# Evaluation on validation set
model.eval()
val_loss = 0
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in val_loader:
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        val_loss += loss.item()
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

val_accuracy = 100 * correct / total
print(f'Validation Loss: {val_loss/len(val_loader)}, Validation Accuracy: {val_accuracy}')


Epoch 1/10, Loss: 1.8906916379928589, Accuracy: 36.36363636363637
Epoch 2/10, Loss: 1.8768877983093262, Accuracy: 45.45454545454545
Epoch 3/10, Loss: 1.8572030067443848, Accuracy: 61.36363636363637
Epoch 4/10, Loss: 1.833652913570404, Accuracy: 84.0909090909091
Epoch 5/10, Loss: 1.807333528995514, Accuracy: 97.72727272727273
Epoch 6/10, Loss: 1.7889308333396912, Accuracy: 95.45454545454545
Epoch 7/10, Loss: 1.7780386805534363, Accuracy: 95.45454545454545
Epoch 8/10, Loss: 1.746815800666809, Accuracy: 97.72727272727273
Epoch 9/10, Loss: 1.735805630683899, Accuracy: 100.0
Epoch 10/10, Loss: 1.70173841714859, Accuracy: 100.0
Validation Loss: 1.676618218421936, Validation Accuracy: 100.0


In [10]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from keras.preprocessing.sequence import pad_sequences

# Define the LSTM-based PyTorch model
class EventOrderingModel(nn.Module):
    def __init__(self, vocab_size, embedding_dim, lstm_units, num_classes):
        super(EventOrderingModel, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, lstm_units, bidirectional=True, batch_first=True)
        self.fc = nn.Linear(lstm_units * 2, 1)  # Output will be a single number for temporal ordering

    def forward(self, x):
        embedded = self.embedding(x)
        lstm_out, _ = self.lstm(embedded)
        out = self.fc(lstm_out[:, -1, :])  # Take the last output for ordering
        return out

# Define model parameters
vocab_size = len(tokenizer.word_index) + 1  # Vocabulary size
embedding_dim = 128  # Embedding dimension
lstm_units = 64  # LSTM units

# Instantiate the model
model = EventOrderingModel(vocab_size, embedding_dim, lstm_units, 1)

# Load the trained model weights if available
# model.load_state_dict(torch.load('path_to_saved_model.pth'))

# Switch the model to evaluation mode
model.eval()

# Example sentences with non-linear, non-chronological events (compound sentences)
example_sentences = [
    "Alice made breakfast, woke up, attended a meeting, and then went for a run.",
    "John returned home, went to the store, and bought milk.",
    "She found the gift, received the package, and opened it.",
    "Tom went to sleep, cleaned the house, and fed the cat."
]

# Corresponding events (for demonstration)
example_events = [
    ["made breakfast", "woke up", "attended a meeting", "went for a run"],
    ["returned home", "went to the store", "bought milk"],
    ["found the gift", "received the package", "opened it"],
    ["went to sleep", "cleaned the house", "fed the cat"]
]

# Tokenize and pad the example events
for events in example_events:
    example_event_sequences = tokenizer.texts_to_sequences(events)
    padded_event_sequences = pad_sequences(example_event_sequences, padding='post', maxlen=X_train.shape[1])

    # Convert padded sequences to torch tensors
    input_tensor = torch.tensor(padded_event_sequences, dtype=torch.long)

    # Make predictions using the trained model
    with torch.no_grad():
        predictions = model(input_tensor)

    # Predictions represent the temporal order
    event_order = predictions.squeeze().numpy()

    # Zip the event texts with their predicted order
    events_with_order = list(zip(events, event_order))

    # Sort events based on their predicted temporal order
    sorted_events = sorted(events_with_order, key=lambda x: x[1])

    # Display only the events in the predicted correct order
    print("\nSentence:", example_sentences[example_events.index(events)])
    print("Events in predicted temporal order:")
    for event, _ in sorted_events:
        print(f"Event: '{event}'")



Sentence: Alice made breakfast, woke up, attended a meeting, and then went for a run.
Events in predicted temporal order:
Event: 'made breakfast'
Event: 'woke up'
Event: 'attended a meeting'
Event: 'went for a run'

Sentence: John returned home, went to the store, and bought milk.
Events in predicted temporal order:
Event: 'returned home'
Event: 'went to the store'
Event: 'bought milk'

Sentence: She found the gift, received the package, and opened it.
Events in predicted temporal order:
Event: 'found the gift'
Event: 'received the package'
Event: 'opened it'

Sentence: Tom went to sleep, cleaned the house, and fed the cat.
Events in predicted temporal order:
Event: 'went to sleep'
Event: 'cleaned the house'
Event: 'fed the cat'
