**The model is an LSTM-based classifier that predicts whether a given event sentence represents a "Typical Time" (indicative of a time-related event) or "Not Typical Time" (indicating that it does not denote a time-related event).**


 **"Typical Time" sentences refer to specific time frames or regular occurrences, while non-"Typical Time" sentences do not convey specific temporal information.**

In [13]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from transformers import DistilBertTokenizer

# Load the dataset
df = pd.read_csv('mc-taco.tsv', sep='\t')

# Rename the columns appropriately
df.columns = ['islam_part', 'question', 'answer', 'no', 'stationarity']  # Adjust if needed
df = df.rename(columns={'islam_part': 'event_sentence', 'stationarity': 'event_type'})

# Filter to keep only 'Typical Time' and 'Stationarity'
df = df[df['event_type'].isin(['Typical Time', 'Stationarity'])]

# Convert labels to binary (1 for Typical Time, 0 for Stationarity)
df['label'] = df['event_type'].apply(lambda x: 1 if x == 'Typical Time' else 0)

# Select only the relevant columns: event_sentence and label
df = df[['event_sentence', 'label']]

# Split the data into training and testing sets
X = df['event_sentence'].values
y = df['label'].values
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize DistilBERT tokenizer
tokenizer = DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

# Tokenize the inputs
train_encodings = tokenizer(list(X_train), truncation=True, padding=True, max_length=512)
test_encodings = tokenizer(list(X_test), truncation=True, padding=True, max_length=512)

# Create a PyTorch Dataset
class TextDataset(Dataset):
    def __init__(self, encodings, labels):
        self.encodings = encodings
        self.labels = labels

    def __getitem__(self, idx):
        item = {key: torch.tensor(val[idx]) for key, val in self.encodings.items()}
        item['labels'] = torch.tensor(self.labels[idx])
        return item

    def __len__(self):
        return len(self.labels)

# Create Dataset objects
train_dataset = TextDataset(train_encodings, y_train)
test_dataset = TextDataset(test_encodings, y_test)

# Define the LSTM model
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, output_dim, n_layers, dropout):
        super(LSTMModel, self).__init__()
        self.embedding = nn.Embedding(input_dim, 128)
        self.lstm = nn.LSTM(128, hidden_dim, n_layers, dropout=dropout, batch_first=True)
        self.fc = nn.Linear(hidden_dim, output_dim)
        self.dropout = nn.Dropout(dropout)

    def forward(self, x):
        x = self.embedding(x)
        x, _ = self.lstm(x)
        x = self.dropout(x[:, -1, :])  # Use the last time step
        x = self.fc(x)
        return x

# Hyperparameters
input_dim = len(tokenizer.vocab)  # Size of vocabulary
hidden_dim = 64  # Number of hidden units in LSTM
output_dim = 1  # Binary classification
n_layers = 1  # Number of LSTM layers
dropout = 0.5  # Dropout rate
batch_size = 32  # Batch size
num_epochs = 5  # Number of epochs

# Create DataLoader
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size)

# Initialize the model
model = LSTMModel(input_dim, hidden_dim, output_dim, n_layers, dropout)

# Define loss and optimizer
criterion = nn.BCEWithLogitsLoss()  # Use BCEWithLogitsLoss for binary classification
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Training loop
for epoch in range(num_epochs):
    model.train()
    for batch in train_loader:
        optimizer.zero_grad()
        outputs = model(batch['input_ids'])
        loss = criterion(outputs.squeeze(), batch['labels'].float())
        loss.backward()
        optimizer.step()
    print(f'Epoch {epoch+1}/{num_epochs}, Loss: {loss.item():.4f}')

# Evaluation
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for batch in test_loader:
        outputs = model(batch['input_ids'])
        predictions = (torch.sigmoid(outputs.squeeze()) > 0.5).int()  # Convert logits to binary predictions
        correct += (predictions == batch['labels']).sum().item()
        total += batch['labels'].size(0)

print(f'Test Accuracy: {correct / total:.4f}')

# Sample test sentences for predictions
test_sentences = [
    "The event took place last week.",
    "He will finish the project by next month.",
    "This was a significant historical moment.",
    "The meeting has been scheduled for tomorrow.",
    "There are plans for a new building in the city.",
    "The law was passed two years ago.",
    "She graduated last year.",
    "The concert will happen in July.",
    "They expect the results to come out soon.",
    "It was a breakthrough moment in science.",
    "The project deadline is approaching quickly.",
    "This method has been proven effective over time.",
    "He has always been a leader in the community.",
    "The report was due yesterday.",
    "They launched the product last summer.",
    "This technology has evolved rapidly.",
    "The findings were published last month.",
    "The contract was renewed last week.",
    "There will be an exhibition next fall.",
    "The movie came out last year.",
    "The survey results will be released next week.",
    "They announced the merger last December.",
    "The team was formed last season.",
    "She received the award last night.",
    "The application is due in two days.",
    "This research has been ongoing for several years.",
    "The festival occurs every summer.",
    "They will review the application by next week.",
    "The bill was introduced in Congress last year.",
    "The service will be available by next quarter.",
    "The changes were implemented two weeks ago.",
    "She will begin her new role next month.",
    "The results were favorable this year.",
    "He is expected to return in three weeks.",
    "The meeting is scheduled for next Wednesday.",
    "The product launch is set for early next year.",
    "They will announce the decision shortly.",
    "The workshop was held last Friday.",
    "The upgrades will be completed by next month.",
    "This system has been reliable for years.",
    "He will continue to work on the project.",
    "The review process will start soon.",
    "The findings were significant at the time.",
    "The policy change takes effect next year.",
    "The negotiations are ongoing."
]

# Tokenize and encode the test sentences
test_encodings = tokenizer(test_sentences, truncation=True, padding=True, max_length=512)

# Convert to PyTorch tensors
test_dataset = TextDataset(test_encodings, [0] * len(test_sentences))  # Placeholder labels

# Make predictions
model.eval()
predictions = []
with torch.no_grad():
    for batch in DataLoader(test_dataset, batch_size=8):
        outputs = model(batch['input_ids'])
        preds = (torch.sigmoid(outputs.squeeze()) > 0.5).int()  # Convert logits to binary predictions
        predictions.extend(preds.numpy())

# Print predictions
for sentence, prediction in zip(test_sentences, predictions):
    print(f'Sentence: "{sentence}" | Prediction: {"Typical Time" if prediction == 1 else "Not Typical Time"}')




Epoch 1/5, Loss: 0.6062
Epoch 2/5, Loss: 0.6175
Epoch 3/5, Loss: 0.6586
Epoch 4/5, Loss: 0.6227
Epoch 5/5, Loss: 0.7222
Test Accuracy: 0.7368
Sentence: "The event took place last week." | Prediction: Typical Time
Sentence: "He will finish the project by next month." | Prediction: Typical Time
Sentence: "This was a significant historical moment." | Prediction: Typical Time
Sentence: "The meeting has been scheduled for tomorrow." | Prediction: Typical Time
Sentence: "There are plans for a new building in the city." | Prediction: Not Typical Time
Sentence: "The law was passed two years ago." | Prediction: Typical Time
Sentence: "She graduated last year." | Prediction: Typical Time
Sentence: "The concert will happen in July." | Prediction: Typical Time
Sentence: "They expect the results to come out soon." | Prediction: Typical Time
Sentence: "It was a breakthrough moment in science." | Prediction: Typical Time
Sentence: "The project deadline is approaching quickly." | Prediction: Typical T