In [3]:
# Clone the repository
!git clone https://github.com/richardsun-voyager/UAFTC.git
%cd UAFTC
!ls

Cloning into 'UAFTC'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 89 (delta 37), reused 66 (delta 18), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 19.24 MiB | 20.61 MiB/s, done.
Resolving deltas: 100% (37/37), done.
/content/UAFTC/UAFTC
appendix_2020.pdf				      data_processor.py
args						      derivatives.pdf
attention_score_binary_classification.ipynb	      dynamic_lstm.py
attn_model.py					      helper.py
attn_neural_classification_interpret_synthetic.ipynb  imgs
data						      README.md


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import os

# Check GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Create directories
!mkdir -p data/custom
!mkdir -p results/sst
!mkdir -p results/custom

PyTorch version: 2.9.0+cu126
CUDA available: True
GPU: Tesla T4


In [6]:
# Set random seeds for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# Attention Classifier Model WITH DROPOUT
class AttentionClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, lambda_scale, dropout=0.5):
        super(AttentionClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.V = nn.Parameter(torch.randn(embed_dim))  # Context vector
        self.W = nn.Parameter(torch.randn(embed_dim))  # Linear layer weight
        self.lambda_scale = lambda_scale

        # Add dropout layer (as in the paper)
        self.dropout = nn.Dropout(dropout)

        # Initialize with uniform distribution [-0.1, 0.1]
        nn.init.uniform_(self.embedding.weight, -0.1, 0.1)
        nn.init.uniform_(self.V, -0.1, 0.1)
        nn.init.uniform_(self.W, -0.1, 0.1)

    def forward(self, x, return_attention=False):
        # x: [batch_size, seq_len]
        embeds = self.embedding(x)  # [batch_size, seq_len, embed_dim]

        # Apply dropout to embeddings (during training)
        embeds = self.dropout(embeds)

        # Compute attention scores
        attention_scores = torch.matmul(embeds, self.V) / self.lambda_scale

        # Compute attention weights
        attention_weights = torch.softmax(attention_scores, dim=1)

        # Weighted sum
        context = torch.sum(embeds * attention_weights.unsqueeze(-1), dim=1)

        # Compute polarity score
        output = torch.matmul(context, self.W)

        if return_attention:
            token_polarity = torch.matmul(embeds, self.W)
            return output, attention_weights, attention_scores, token_polarity

        return output

print("Model defined with dropout (p=0.5)")

Model defined with dropout (p=0.5)


Helper Functions

In [None]:
def build_vocab(data, min_freq=1):
    """Build vocabulary from data"""
    word_counts = Counter()
    for text, _ in data:
        words = text.split()
        word_counts.update(words)

    # Use explicit PAD and UNK tokens
    vocab = {'<PAD>': 0, '<UNK>': 1}
    idx = 2
    for word, count in word_counts.items():
        if count >= min_freq:
            vocab[word] = idx
            idx += 1
    return vocab

def text_to_indices(text, vocab, max_len=50):
    """Convert text to indices"""
    words = text.split()[:max_len]
    indices = [vocab.get(word, vocab['<UNK>']) for word in words]
    # Pad to fixed length
    while len(indices) < max_len:
        indices.append(vocab['<PAD>'])
    return indices

def train_model(model, train_data, dev_data, vocab, num_epochs, device, verbose=False):
    """Train the model"""
    criterion = nn.BCEWithLogitsLoss()
    optimizer = optim.Adagrad(model.parameters(), lr=0.01)

    best_dev_acc = 0
    patience = 10
    patience_counter = 0

    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        random.shuffle(train_data)

        for text, label in train_data:
            indices = text_to_indices(text, vocab)
            x = torch.LongTensor([indices]).to(device)
            y = torch.FloatTensor([label]).to(device)

            optimizer.zero_grad()
            output = model(x)

            y_scaled = 2 * y - 1
            loss = criterion(output * y_scaled, torch.ones_like(output))

            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            pred = (torch.sigmoid(output) > 0.5).float()
            correct += (pred == y).sum().item()
            total += 1

        train_acc = correct / total
        dev_acc = evaluate_model(model, dev_data, vocab, device)

        if verbose and (epoch + 1) % 10 == 0:
            print(f"Epoch {epoch+1}/{num_epochs} - Loss: {total_loss/total:.4f}, Train: {train_acc:.4f}, Dev: {dev_acc:.4f}")

        if dev_acc > best_dev_acc:
            best_dev_acc = dev_acc
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                if verbose:
                    print(f"Early stopping at epoch {epoch+1}")
                break

    return best_dev_acc

def evaluate_model(model, data, vocab, device):
    """Evaluate the model"""
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for text, label in data:
            indices = text_to_indices(text, vocab)
            x = torch.LongTensor([indices]).to(device)
            y = torch.FloatTensor([label]).to(device)

            output = model(x)
            pred = (torch.sigmoid(output) > 0.5).float()
            correct += (pred == y).sum().item()
            total += 1

    return correct / total

print("Helper functions defined")