In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
cd drive

/content/drive


In [3]:
cd My \Drive

/content/drive/My Drive


In [4]:
cd NLP/

/content/drive/My Drive/NLP


In [5]:
!pip install datasets

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting datasets
  Downloading datasets-2.11.0-py3-none-any.whl (468 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m468.7/468.7 KB[0m [31m33.7 MB/s[0m eta [36m0:00:00[0m
Collecting aiohttp
  Downloading aiohttp-3.8.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.0/1.0 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0.0,>=0.11.0
  Downloading huggingface_hub-0.13.4-py3-none-any.whl (200 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m200.1/200.1 KB[0m [31m23.4 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting responses<0.19
  Downloading responses-0.18.0-py3-none-any.whl (38 kB)
Collecting xxhash
  Downloading xxhash-3.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (212 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

In [None]:
# Load libraries
import pandas as pd
import nltk
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
import numpy as np
from sklearn.metrics import confusion_matrix
from sklearn.metrics import precision_recall_fscore_support
import datasets
from sklearn.metrics import accuracy_score, precision_recall_fscore_support, confusion_matrix
nltk.download('omw-1.4')
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

torch.manual_seed(42)
np.random.seed(42)

# Load the dataset
dataset = datasets.load_dataset('ucberkeley-dlab/measuring-hate-speech', 'binary')
df = dataset['train'].to_pandas()
df.rename(columns={'text': 'tweet', 'hate_speech_score': 'class'}, inplace=True)
df['binary_class'] = df['class'] > 0.5

# Convert to lowercase, remove punctuation, extra spaces, URLs, mentions, and hashtags
df['tweet'] = df['tweet'].str.lower().replace(r'[^\w\s]', '', regex=True).replace(' {2,}', ' ', regex=True).replace('"', '')
df['tweet'] = df['tweet'].replace(r'http\S+|www.\S+|@\w+|#\w+', '', regex=True)

# Tokenization
nltk.download('punkt')
df['tweet'] = df['tweet'].apply(nltk.word_tokenize)

# Lemmatization
nltk.download('wordnet')
lemmatizer = WordNetLemmatizer()
df['tweet'] = df['tweet'].apply(lambda x: [lemmatizer.lemmatize(word) for word in x])

# Removing stop-words
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))
df['tweet'] = df['tweet'].apply(lambda x: ' '.join([word for word in x if word not in stop_words]))

# Create a custom dataset class
class TextDataset(Dataset):
    def __init__(self, texts, labels):
        self.texts = texts
        self.labels = labels

    def __len__(self):
        return len(self.texts)

    def __getitem__(self, idx):
        return self.texts[idx], self.labels[idx]

# Encode the labels
le = LabelEncoder()
y = le.fit_transform(df['binary_class'])

# Splitting the Data using Stratified split
X_train, X_test, y_train, y_test = train_test_split(df['tweet'], y, test_size=0.3, stratify=y, random_state=42)

# Tokenize and pad the input sequences
'''
def tokenize_and_pad(texts, maxlen=100):
    tokenized_texts = [nltk.word_tokenize(text) for text in texts]
    return pad_sequence([torch.tensor([word_to_index[word] for word in text if word in word_to_index][:maxlen]) for text in tokenized_texts], batch_first=True, padding_value=len(word_to_index))
'''
def tokenize_and_pad(texts, maxlen=100):
    tokenized_texts = [nltk.word_tokenize(text) for text in texts]
    return pad_sequence([torch.tensor([word_to_index[word] for word in text if word in word_to_index][-maxlen:]) for text in tokenized_texts], batch_first=True, padding_value=len(word_to_index))

word_to_index = {word: i for i, word in enumerate(set(df['tweet'].str.cat(sep=' ').split()), 1)}
X_train = tokenize_and_pad(X_train)
X_test = tokenize_and_pad(X_test)

# Create PyTorch Datasets and DataLoaders
train_dataset = TextDataset(X_train, y_train)
test_dataset = TextDataset(X_test, y_test)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32)

# Create a PyTorch LSTM model
class LSTMBaseline(nn.Module):
    def __init__(self, vocab_size, embedding_dim, hidden_dim, num_classes):
        super().__init__()
        self.embedding = nn.Embedding(vocab_size, embedding_dim)
        self.lstm = nn.LSTM(embedding_dim, hidden_dim, batch_first=True)
        self.fc = nn.Linear(hidden_dim, num_classes)

    def forward(self, x):
        x = self.embedding(x)
        packed_output, (hidden, cell) = self.lstm(x)
        x = self.fc(hidden[-1])
        return x

# Initialize the model, optimizer, and loss function
#model = LSTMBaseline(len(word_to_index) + 1, 50, 100, len(set(y)))
model = LSTMBaseline(len(word_to_index) + 1, 50, 256, len(set(y)))
optimizer = optim.Adam(model.parameters(), lr=1e-3)
criterion = nn.CrossEntropyLoss()
    
# Train the model
epochs = 20
for epoch in range(epochs):
    model.train()
    epoch_loss = 0

    for texts, labels in train_loader:
        optimizer.zero_grad()
        outputs = model(texts)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        epoch_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {epoch_loss / len(train_loader)}")

    # Save Model
    torch.save(model, '/mnt/c/Users/tpras/Documents/UF/2nd Sem/NLP/Project/Weights/HuggingFaceLSTM.pth')

# Test the model and collect predictions and true labels
model.eval()
predictions = []
true_labels = []

with torch.no_grad():
    for texts, labels in test_loader:
        outputs = model(texts)
        _, predicted = torch.max(outputs, 1)
        predictions.extend(predicted.numpy())
        true_labels.extend(labels.numpy())

# Calculate accuracy, precision, recall, F1-score, and confusion matrix
accuracy = np.mean(np.array(predictions) == np.array(true_labels))
precision, recall, f1_score, _ = precision_recall_fscore_support(true_labels, predictions, average='weighted')
conf_mat = confusion_matrix(true_labels, predictions)

print("Accuracy: ", accuracy)
print("Precision: ", precision)
print("Recall: ", recall)
print("F1-score: ", f1_score)
print("Confusion Matrix:\n", conf_mat)

  from .autonotebook import tqdm as notebook_tqdm
[nltk_data] Downloading package omw-1.4 to
[nltk_data]     /home/pthamminedi/nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!
[nltk_data] Downloading package punkt to
[nltk_data]     /home/pthamminedi/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /home/pthamminedi/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /home/pthamminedi/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
Using custom data configuration ucberkeley-dlab--measuring-hate-speech-c32713cabe528196
Reusing dataset parquet (/home/pthamminedi/.cache/huggingface/datasets/ucberkeley-dlab___parquet/ucberkeley-dlab--measuring-hate-speech-c32713cabe528196/0.0.0/2a3b91fbd88a2c90d1dbbb32b460cf621d31bd5b05b934492fdef7d8d6f236ec)
100%|████████████████████████████████████████████████████████████

Epoch 1/20, Loss: 0.6551673836749868
Epoch 2/20, Loss: 0.6547368911577894
Epoch 3/20, Loss: 0.6546391571745709
Epoch 4/20, Loss: 0.6546084683888359


In [None]:
import textattack
import transformers
from textattack.transformations import WordSwapEmbedding
from textattack.search_methods import GreedyWordSwapWIR
from textattack import Attack

# Load model, tokenizer, and model_wrapper
model = transformers.AutoModelForSequenceClassification.from_pretrained("textattack/bert-base-uncased-imdb")
tokenizer = transformers.AutoTokenizer.from_pretrained("textattack/bert-base-uncased-imdb")
model_wrapper = textattack.models.wrappers.HuggingFaceModelWrapper(model, tokenizer)

# Construct our four components for `Attack`
from textattack.constraints.pre_transformation import RepeatModification, StopwordModification
from textattack.constraints.semantics import WordEmbeddingDistance

goal_function = textattack.goal_functions.UntargetedClassification(model_wrapper)
constraints = [
    RepeatModification(),
    StopwordModification(),
    WordEmbeddingDistance(min_cos_sim=0.9)
]
transformation = WordSwapEmbedding(max_candidates=50)
search_method = GreedyWordSwapWIR(wir_method="delete")



def calculate_doe(lime_results):
    feature_scores = [abs(score) for _, score in lime_results]
    std_dev = np.std(feature_scores)
    significant_features = len([score for score in feature_scores if score > std_dev])
    return significant_features

doe = calculate_doe(lime_results)
print("Degree of Explainability (DoE):", doe)

# Adversarial Robustness
def attack(model, tokenizer, dataset, samples=20):
    correct_before_attack = 0
    attacked = 0
    correct_after_attack = 0

    #attack = TextFoolerJin2019.build(model)
    # Construct the actual attack
    attack1 = Attack(goal_function, constraints, transformation, search_method)

    for i in range(samples):
        example = dataset[i]
        #print(example)
        input_text = example['tweet']
        ground_truth_label = example['class']
        result = attack1.attack(input_text, ground_truth_label)
        print(result)
        print("1st")
        print(result.original_result.raw_output[0])
        print("2nd")
        print(result.perturbed_result.raw_output[0])
        
        if result.original_result.raw_output[0] > 0.5:
            correct_before_attack += 1
            
            if result.perturbed_result.raw_output[0] <= 0.5:
                attacked += 1
        elif result.original_result.raw_output[0] <= 0.5:
            correct_after_attack += 1

    return attacked, correct_before_attack, correct_after_attack

tokenizer = GPT2Tokenizer.from_pretrained("gpt2")

def load_custom_dataset(path):
    dataset = load_dataset("csv", data_files=path, split="train")
    return dataset

test_data_path = "/mnt/c/Users/tpras/Documents/UF/2nd Sem/NLP/Project/KaggleData.csv"
test_dataset = load_custom_dataset(test_data_path)
attacked, correct_before_attack, correct_after_attack = attack(wrapped_model, tokenizer, test_dataset)

print("Adversarial attack results:")
print(f"Total samples attacked: {attacked}")

if attacked > 0:
    print(f"Accuracy before attack: {correct_before_attack / attacked}")
    print(f"Accuracy after attack: {correct_after_attack / attacked}")

    adv_rob = correct_after_attack / correct_before_attack
    print("Adversarial Robustness (AdvRob):", adv_rob)

    attack_resilience = 1 - abs(adv_rob - 1)
    print("Attack Resilience (Ar):", attack_resilience)
else:
    print("No successful adversarial attacks.")