In [3]:
# Clone the repository
!git clone https://github.com/richardsun-voyager/UAFTC.git
%cd UAFTC
!ls

Cloning into 'UAFTC'...
remote: Enumerating objects: 89, done.[K
remote: Counting objects: 100% (89/89), done.[K
remote: Compressing objects: 100% (67/67), done.[K
remote: Total 89 (delta 37), reused 66 (delta 18), pack-reused 0 (from 0)[K
Receiving objects: 100% (89/89), 19.24 MiB | 20.61 MiB/s, done.
Resolving deltas: 100% (37/37), done.
/content/UAFTC/UAFTC
appendix_2020.pdf				      data_processor.py
args						      derivatives.pdf
attention_score_binary_classification.ipynb	      dynamic_lstm.py
attn_model.py					      helper.py
attn_neural_classification_interpret_synthetic.ipynb  imgs
data						      README.md


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from collections import Counter
import os

# Check GPU
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")

# Create directories
!mkdir -p data/custom
!mkdir -p results/sst
!mkdir -p results/custom

PyTorch version: 2.9.0+cu126
CUDA available: True
GPU: Tesla T4


In [6]:
# Set random seeds for reproducibility
def set_seed(seed=42):
    torch.manual_seed(seed)
    np.random.seed(seed)
    random.seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed_all(seed)

set_seed(42)

# Attention Classifier Model WITH DROPOUT
class AttentionClassifier(nn.Module):
    def __init__(self, vocab_size, embed_dim, lambda_scale, dropout=0.5):
        super(AttentionClassifier, self).__init__()
        self.embedding = nn.Embedding(vocab_size, embed_dim)
        self.V = nn.Parameter(torch.randn(embed_dim))  # Context vector
        self.W = nn.Parameter(torch.randn(embed_dim))  # Linear layer weight
        self.lambda_scale = lambda_scale

        # Add dropout layer (as in the paper)
        self.dropout = nn.Dropout(dropout)

        # Initialize with uniform distribution [-0.1, 0.1]
        nn.init.uniform_(self.embedding.weight, -0.1, 0.1)
        nn.init.uniform_(self.V, -0.1, 0.1)
        nn.init.uniform_(self.W, -0.1, 0.1)

    def forward(self, x, return_attention=False):
        # x: [batch_size, seq_len]
        embeds = self.embedding(x)  # [batch_size, seq_len, embed_dim]

        # Apply dropout to embeddings (during training)
        embeds = self.dropout(embeds)

        # Compute attention scores
        attention_scores = torch.matmul(embeds, self.V) / self.lambda_scale

        # Compute attention weights
        attention_weights = torch.softmax(attention_scores, dim=1)

        # Weighted sum
        context = torch.sum(embeds * attention_weights.unsqueeze(-1), dim=1)

        # Compute polarity score
        output = torch.matmul(context, self.W)

        if return_attention:
            token_polarity = torch.matmul(embeds, self.W)
            return output, attention_weights, attention_scores, token_polarity

        return output

print("Model defined with dropout (p=0.5)")

Model defined with dropout (p=0.5)
