In [1]:
import torch
from torch.nn import functional as F

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
from torchtext.data import get_tokenizer

In [4]:
#pytorch tokenizer
tokenizer = get_tokenizer("basic_english")

In [5]:
#load glove embeddings 
from torchtext.vocab import GloVe

global_vectors = GloVe(name='6B', dim=100)

In [6]:
import pandas as pd

train_df = pd.read_csv('data/train.tsv', delimiter='\t', header=None, names=['unique_id', 'label', 'extra_label', 'text'])
train_df

Unnamed: 0,unique_id,label,extra_label,text
0,25462,1,a,There are not any jew signatures on our Declar...
1,22529,1,a,"Hey ,muslim dickhead !!!!!!!! Why don;t you go..."
2,29861,0,a,"Generally true, but there are indeed missionar..."
3,6358,1,a,Why do so many filthy wetback half-breed spic ...
4,7907,0,a,Why are girl so ugly to the girls that are jus...
...,...,...,...,...
34234,6198,0,a,Why are Hindus always an easy target. On my re...
34235,2100,0,a,"You people should be proud to be gay,lesbian,b..."
34236,14780,1,a,i hate jews so much i wish the holocaust actua...
34237,8837,1,a,ayo i even kill handicapped and crippled bitch...


In [7]:
sentences = train_df.text.values
labels = train_df.label.values

In [8]:
tokenized_texts = [tokenizer(sent) for sent in sentences]

In [9]:
unique_tokens = set(token for tokens in tokenized_texts for token in tokens)
vocab_size = len(unique_tokens)
vocab_size

20886

In [10]:
max_words = 25
pad_tokenized_text = [tokens + [""] * (max_words - len(tokens)) if len(tokens) < max_words else tokens[:max_words] for tokens in tokenized_texts]

In [11]:
from torchtext.vocab import vocab
unk_token = "<unk>"
unk_index = 0

glove_vocab = vocab(global_vectors.stoi)
glove_vocab.insert_token("<unk>",unk_index)
#this is necessary otherwise it will throw runtime error if OOV token is queried 
glove_vocab.set_default_index(unk_index)

In [12]:
pad_tokenized_text = [glove_vocab(tokens) for tokens in pad_tokenized_text]

In [13]:
pad_tokenized_text = torch.tensor(pad_tokenized_text)
labels = torch.tensor(labels, dtype=torch.long)

In [14]:
from torch.utils.data import TensorDataset, DataLoader, RandomSampler

batch = 32
embed_len = 100


train_data = TensorDataset(pad_tokenized_text, labels)
train_sampler = RandomSampler(train_data)
train_dataloader = DataLoader(train_data, sampler=train_sampler, batch_size=batch)

In [16]:
class SelfAttention(torch.nn.Module):
    def __init__(self, input_dim):
        super(SelfAttention, self).__init__()
        self.input_dim = input_dim
        
        #column can be anything, more dimension better accuracy
        #row must be same as INPUT DIMENSION column
        #query matrix
        self.Wq = torch.nn.Parameter(torch.randn(input_dim, input_dim)) 
        #key matrix
        self.Wk = torch.nn.Parameter(torch.randn(input_dim, input_dim)) 
        #value matrix
        self.Wv = torch.nn.Parameter(torch.randn(input_dim, input_dim))
        
        #for normalization/probablity distribution
        self.softmax = torch.nn.Softmax(dim=2)
        
    def forward(self, x):
        #x = x.transpose(0, 1) # Assuming x is of shape (sequence_length, batch_size, input_dim)
        #print(x.shape)
        #print(self.Wq.shape)
        queries = torch.matmul(x, self.Wq)
        keys = torch.matmul(x, self.Wk)
        values = torch.matmul(x, self.Wv)
        
        #bmm = matrix-matrix product
        scores = torch.bmm(queries, keys.transpose(1, 2)) / (self.input_dim ** 0.5)
        
        #normalise score 
        attention = self.softmax(scores)
        
        #update attention weight
        weighted = torch.bmm(attention, values)
        
        weighted_transposed = weighted.transpose(0, 1)
        
        return weighted, weighted_transposed 

In [54]:
class customeModel(torch.nn.Module):
    def __init__(self, glove_vectors, input_dim, hidden_size, embedding_dimensions, num_classes):
        super(customeModel, self).__init__()
        
        #variables
        self.glove_vectors = glove_vectors
        self.input_dim = input_dim
        self.embedding_dimensions = embedding_dimensions
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        
        #layer
        self.embedding_layer = torch.nn.Embedding.from_pretrained(self.glove_vectors.vectors, freeze=True, sparse=True)
        
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm_1 = torch.nn.LSTM(self.embedding_dimensions, self.hidden_size, batch_first = True)
        
        self.attention = SelfAttention(self.input_dim)
        
        self.lstm_2 = torch.nn.LSTM(self.hidden_size, self.hidden_size, batch_first = True)
        
        self.output_layer = torch.nn.Linear(self.hidden_size, self.num_classes)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, batch_input):
        embeds = self.embedding_layer(batch_input)
        print(embeds.shape)
        lstm_out_1, _ = self.lstm_1(embeds)
        print(lstm_out_1.shape)
        attn_out, attn_out_transposed = self.attention(lstm_out_1)
        lstm_out_2, _ = self.lstm_2(attn_out_transposed)
        output = self.output_layer(lstm_out_2) #lstm_out_2[:, -1, :] using last hidden_state for classification
        output_sigmoid = self.sigmoid(output)
        
        #return F.log_softmax(output, dim=1)
        return output_sigmoid, attn_out, attn_out_transposed
        

In [55]:
from torch.optim import Adam

epochs = 25
learning_rate = 1e-3

loss_fn = torch.nn.CrossEntropyLoss()
custom_model = customeModel(glove_vectors = global_vectors, #glove vectors
                            input_dim = 25, #self-attention
                            hidden_size = 25, #lstm cell
                            embedding_dimensions = 100, #glove embedding 
                            num_classes = 1) #output classes are 2 but neurons req out_class - 1

optimizer = Adam(custom_model.parameters(), lr=learning_rate)

In [50]:
for step, batch in enumerate(train_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_labels = batch

In [51]:
b_input_ids.shape

torch.Size([31, 25])

In [56]:
outputs, attn_out, attn_out_trans = custom_model(b_input_ids)

torch.Size([31, 25, 100])
torch.Size([31, 25, 25])


In [24]:
from tqdm import tqdm

loss_function = torch.nn.BCELoss()

for epochs in tqdm(range(10)):
    if epochs > 1:
        print("Loss " + str(loss))
    for step, batch in enumerate(train_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_labels = batch
        
        #clear gradients 
        custom_model.zero_grad()
        
        #get output and transform it according to labels 
        outputs, attn_out, attn_out_trans = custom_model(b_input_ids)
        class_probs = torch.mean(outputs, dim=0)
        
        #reshape labels and convert to float 
        b_labels_2d = b_labels.view(-1, 1)
        b_labels_2d = b_labels_2d.float()
        
        loss = loss_function(class_probs, b_labels_2d)
        #print("Loss " + str(loss))
        #compute gradient 
        loss.backward()
        
        #update parameters 
        optimizer.step()

 20%|████████▊                                   | 2/10 [00:14<00:58,  7.36s/it]

Loss tensor(0.0836, grad_fn=<BinaryCrossEntropyBackward0>)


 30%|█████████████▏                              | 3/10 [00:21<00:51,  7.33s/it]

Loss tensor(0.1173, grad_fn=<BinaryCrossEntropyBackward0>)


 40%|█████████████████▌                          | 4/10 [00:29<00:43,  7.30s/it]

Loss tensor(0.0118, grad_fn=<BinaryCrossEntropyBackward0>)


 50%|██████████████████████                      | 5/10 [00:36<00:36,  7.23s/it]

Loss tensor(0.0186, grad_fn=<BinaryCrossEntropyBackward0>)


 60%|██████████████████████████▍                 | 6/10 [00:43<00:28,  7.19s/it]

Loss tensor(0.0013, grad_fn=<BinaryCrossEntropyBackward0>)


 70%|██████████████████████████████▊             | 7/10 [00:50<00:21,  7.16s/it]

Loss tensor(0.0639, grad_fn=<BinaryCrossEntropyBackward0>)


 80%|███████████████████████████████████▏        | 8/10 [00:57<00:14,  7.14s/it]

Loss tensor(0.0135, grad_fn=<BinaryCrossEntropyBackward0>)


 90%|███████████████████████████████████████▌    | 9/10 [01:04<00:07,  7.13s/it]

Loss tensor(0.0197, grad_fn=<BinaryCrossEntropyBackward0>)


100%|███████████████████████████████████████████| 10/10 [01:11<00:00,  7.19s/it]


In [26]:
max_words = 25

def pad_sentence(sentence):
    if len(sentence) < max_words:
        
        sentence = sentence + [""]*(max_words - len(sentence))
        
    else:
        sentence = sentence[:max_words]
        
    return sentence

In [27]:
test_sentence = 'Fuck this bitch. This sentence should be offensive.'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_weight, attn_weight_trans = custom_model(pad_tokenized_test_sentence)

print(torch.mean(output, dim=0))

['fuck', 'this', 'bitch', '.', 'this', 'sentence', 'should', 'be', 'offensive', '.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tensor([[0.9820]], grad_fn=<MeanBackward1>)


In [28]:
attn_weight.shape

torch.Size([1, 25, 25])

In [29]:
attn_weight_trans.shape

torch.Size([25, 1, 25])

In [30]:
attn_weight = torch.squeeze(attn_weight)
attn_weight.shape

torch.Size([25, 25])

In [40]:
attn_weight.sum(dim=0)

tensor([  81.8591,  -15.5634,  -27.9860,  -69.1191,  -55.4261,   14.0503,
          18.0048,   90.8945,   88.3963,    4.1183,  -16.9858,   13.9386,
         -82.0713,  -11.4519,   26.5646,    4.4011,  -47.4977,  -99.7784,
         -12.6317,   60.4945,  109.4932,   62.3862,   11.9425,   30.0096,
        -149.0325], grad_fn=<SumBackward1>)

In [36]:
test_sentence = 'This nigger tried to kill me'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_weight, attn_weight_trans = custom_model(pad_tokenized_test_sentence)

print(torch.mean(output, dim=0))

['this', 'nigger', 'tried', 'to', 'kill', 'me', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tensor([[0.9995]], grad_fn=<MeanBackward1>)


In [45]:
attn_weight = torch.squeeze(attn_weight)
attn_weight.shape

torch.Size([25, 25])

In [39]:
attn_weight.sum(dim=0)

tensor([  81.8591,  -15.5634,  -27.9860,  -69.1191,  -55.4261,   14.0503,
          18.0048,   90.8945,   88.3963,    4.1183,  -16.9858,   13.9386,
         -82.0713,  -11.4519,   26.5646,    4.4011,  -47.4977,  -99.7784,
         -12.6317,   60.4945,  109.4932,   62.3862,   11.9425,   30.0096,
        -149.0325], grad_fn=<SumBackward1>)

In [44]:
import numpy as np
np.argsort(attn_weight.sum(dim=0).detach().numpy())

array([24, 17, 12,  3,  4, 16,  2, 10,  1, 18, 13,  9, 15, 22, 11,  5,  6,
       14, 23, 19, 21,  0,  8,  7, 20])

In [46]:
from torch.nn.functional import softmax

attn_scores_softmax = softmax(attn_weight, dim=-1)

In [48]:
attn_scores_softmax

tensor([[1.2077e-01, 2.6530e-03, 1.4861e-03, 2.9425e-04, 5.0488e-04, 8.0333e-03,
         9.8725e-03, 1.7081e-01, 1.4336e-01, 5.2988e-03, 2.2640e-03, 8.6876e-03,
         1.7485e-04, 2.8332e-03, 1.2584e-02, 5.7206e-03, 6.5695e-04, 8.4294e-05,
         2.8274e-03, 5.1416e-02, 3.6953e-01, 5.7315e-02, 7.6154e-03, 1.5198e-02,
         1.2822e-05],
        [1.1957e-01, 2.6194e-03, 1.4293e-03, 2.8701e-04, 4.7210e-04, 8.0463e-03,
         9.6556e-03, 1.7050e-01, 1.3372e-01, 5.1828e-03, 2.1844e-03, 9.2473e-03,
         1.7254e-04, 2.6864e-03, 1.1988e-02, 5.9125e-03, 5.9815e-04, 7.8831e-05,
         2.8811e-03, 5.2453e-02, 3.8119e-01, 5.6683e-02, 7.5647e-03, 1.4858e-02,
         1.2571e-05],
        [1.1753e-01, 2.4600e-03, 1.4197e-03, 2.8520e-04, 4.8155e-04, 7.8401e-03,
         9.1457e-03, 1.7257e-01, 1.4909e-01, 5.2171e-03, 2.2865e-03, 8.0245e-03,
         1.7402e-04, 2.7122e-03, 1.2892e-02, 5.5837e-03, 6.4062e-04, 8.0870e-05,
         2.8089e-03, 5.2202e-02, 3.6867e-01, 5.5647e-02, 7.3959e-

# From Scratch

In [68]:
class SelfAttention(torch.nn.Module):
    def __init__(self, input_dim):
        super(SelfAttention, self).__init__()
        self.input_dim = input_dim
        
        #column can be anything, more dimension better accuracy
        #row must be same as INPUT DIMENSION column
        #query matrix
        self.Wq = torch.nn.Linear(input_dim, input_dim) 
        #key matrix
        self.Wk = torch.nn.Linear(input_dim, input_dim) 
        #value matrix
        self.Wv = torch.nn.Linear(input_dim, input_dim)
        
        #for normalization/probablity distribution
        self.softmax = torch.nn.Softmax(dim=-1)
        
    def forward(self, x):
        #x = x.transpose(0, 1) # Assuming x is of shape (sequence_length, batch_size, input_dim)
        #print(x.shape)
        #print(self.Wq.shape)
        
        queries = self.Wq(x)
        keys = self.Wk(x)
        values = self.Wv(x)
        
        print("Queries  " + str(queries.shape))
        print("Keys  " + str(keys.shape))
        print("Values  " + str(values.shape))
        
        #bmm = matrix-matrix product
        #scores = torch.bmm(queries, keys.transpose(1, 2)) / (self.input_dim ** 0.5)
        scores = torch.matmul(queries, keys.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.input_dim, dtype=torch.float32))
        
        #normalise score 
        #attention = self.softmax(scores)
        attention_weights = self.softmax(scores)
        print("Scores " + str(attention_weights.shape))
        
        #update attention weight
        weighted = torch.matmul(attention_weights, values)
        print("Weighted " + str(weighted.shape))
        
        weighted_transposed = weighted.transpose(0, 1)
        print("Weighted Transpose " + str(weighted_transposed.shape))
        
        return attention_weights, weighted, weighted_transposed 

In [75]:
class customeModel(torch.nn.Module):
    def __init__(self, glove_vectors, input_dim, hidden_size, embedding_dimensions, num_classes):
        super(customeModel, self).__init__()
        
        #variables
        self.glove_vectors = glove_vectors
        self.input_dim = input_dim
        self.embedding_dimensions = embedding_dimensions
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        
        #layer
        self.embedding_layer = torch.nn.Embedding.from_pretrained(self.glove_vectors.vectors, freeze=True, sparse=True)
        
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm_1 = torch.nn.LSTM(self.embedding_dimensions, self.hidden_size, batch_first = True)
        
        self.attention = SelfAttention(self.input_dim)
        
        self.lstm_2 = torch.nn.LSTM(self.hidden_size, self.hidden_size, batch_first = True)
        
        self.output_layer = torch.nn.Linear(self.hidden_size, self.num_classes)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, batch_input):
        embeds = self.embedding_layer(batch_input)
        print("Embeds " + str(embeds.shape))
        lstm_out_1, _ = self.lstm_1(embeds)
        print("LSTM 1 + " + str(lstm_out_1.shape))
        attn_weights, weighted_output, weighted_output_transposed = self.attention(lstm_out_1)
        print("Attn wt "  + str(attn_weights.shape))
        print("Weighted Output "  + str(weighted_output.shape))
        print("Weighted Output Transposed "  + str(weighted_output_transposed.shape))
        lstm_out_2, _ = self.lstm_2(weighted_output)
        output = self.output_layer(lstm_out_2) #lstm_out_2[:, -1, :] using last hidden_state for classification
        output_sigmoid = self.sigmoid(output)
        
        #return F.log_softmax(output, dim=1)
        return output_sigmoid, attn_weights, weighted_output
        

In [76]:
from torch.optim import Adam

epochs = 25
learning_rate = 1e-3

loss_fn = torch.nn.CrossEntropyLoss()
custom_model = customeModel(glove_vectors = global_vectors, #glove vectors
                            input_dim = 25, #self-attention
                            hidden_size = 25, #lstm cell
                            embedding_dimensions = 100, #glove embedding 
                            num_classes = 1) #output classes are 2 but neurons req out_class - 1

optimizer = Adam(custom_model.parameters(), lr=learning_rate)

In [78]:
outputs, attn_wt, wt_out = custom_model(b_input_ids)

Embeds torch.Size([31, 25, 100])
LSTM 1 + torch.Size([31, 25, 25])
Queries  torch.Size([31, 25, 25])
Keys  torch.Size([31, 25, 25])
Values  torch.Size([31, 25, 25])
Scores torch.Size([31, 25, 25])
Weighted torch.Size([31, 25, 25])
Weighted Transpose torch.Size([25, 31, 25])
Attn wt torch.Size([31, 25, 25])
Weighted Output torch.Size([31, 25, 25])
Weighted Output Transposed torch.Size([25, 31, 25])


In [80]:
attn_wt.shape

torch.Size([31, 25, 25])

In [82]:
wt_out.shape

torch.Size([31, 25, 25])

In [94]:
torch.mean(outputs, dim=1).shape

torch.Size([32, 1])

# Train Again

In [86]:
class SelfAttention(torch.nn.Module):
    def __init__(self, input_dim):
        super(SelfAttention, self).__init__()
        self.input_dim = input_dim
        
        #column can be anything, more dimension better accuracy
        #row must be same as INPUT DIMENSION column
        #query matrix
        self.Wq = torch.nn.Linear(input_dim, input_dim) 
        #key matrix
        self.Wk = torch.nn.Linear(input_dim, input_dim) 
        #value matrix
        self.Wv = torch.nn.Linear(input_dim, input_dim)
        
        #for normalization/probablity distribution
        self.softmax = torch.nn.Softmax(dim=-1)
        
    def forward(self, x):
        #x = x.transpose(0, 1) # Assuming x is of shape (sequence_length, batch_size, input_dim)
        #print(x.shape)
        #print(self.Wq.shape)
        
        queries = self.Wq(x)
        keys = self.Wk(x)
        values = self.Wv(x)
        
        #bmm = matrix-matrix product
        #scores = torch.bmm(queries, keys.transpose(1, 2)) / (self.input_dim ** 0.5)
        scores = torch.matmul(queries, keys.transpose(-2, -1)) / torch.sqrt(torch.tensor(self.input_dim, dtype=torch.float32))
        
        #normalise score 
        #attention = self.softmax(scores)
        attention_weights = self.softmax(scores)
        #print("Scores " + str(attention_weights.shape))
        
        #update attention weight
        weighted = torch.matmul(attention_weights, values)
        #print("Weighted " + str(weighted.shape))
        
        weighted_transposed = weighted.transpose(0, 1)
        #print("Weighted Transpose " + str(weighted_transposed.shape))
        
        return attention_weights, weighted, weighted_transposed 

In [87]:
class customeModel(torch.nn.Module):
    def __init__(self, glove_vectors, input_dim, hidden_size, embedding_dimensions, num_classes):
        super(customeModel, self).__init__()
        
        #variables
        self.glove_vectors = glove_vectors
        self.input_dim = input_dim
        self.embedding_dimensions = embedding_dimensions
        self.hidden_size = hidden_size
        self.num_classes = num_classes
        
        #layer
        self.embedding_layer = torch.nn.Embedding.from_pretrained(self.glove_vectors.vectors, freeze=True, sparse=True)
        
        # The LSTM takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.lstm_1 = torch.nn.LSTM(self.embedding_dimensions, self.hidden_size, batch_first = True)
        
        self.attention = SelfAttention(self.input_dim)
        
        self.lstm_2 = torch.nn.LSTM(self.hidden_size, self.hidden_size, batch_first = True)
        
        self.output_layer = torch.nn.Linear(self.hidden_size, self.num_classes)
        self.sigmoid = torch.nn.Sigmoid()
        
    def forward(self, batch_input):
        embeds = self.embedding_layer(batch_input)
        #print("Embeds " + str(embeds.shape))
        lstm_out_1, _ = self.lstm_1(embeds)
        #print("LSTM 1 + " + str(lstm_out_1.shape))
        attn_weights, weighted_output, weighted_output_transposed = self.attention(lstm_out_1)
        #print("Attn wt "  + str(attn_weights.shape))
        #print("Weighted Output "  + str(weighted_output.shape))
        #print("Weighted Output Transposed "  + str(weighted_output_transposed.shape))
        lstm_out_2, _ = self.lstm_2(weighted_output)
        output = self.output_layer(lstm_out_2) #lstm_out_2[:, -1, :] using last hidden_state for classification
        output_sigmoid = self.sigmoid(output)
        
        #return F.log_softmax(output, dim=1)
        return output_sigmoid, attn_weights, weighted_output
        

In [88]:
from torch.optim import Adam

epochs = 25
learning_rate = 1e-3

loss_fn = torch.nn.CrossEntropyLoss()
custom_model = customeModel(glove_vectors = global_vectors, #glove vectors
                            input_dim = 25, #self-attention
                            hidden_size = 25, #lstm cell
                            embedding_dimensions = 100, #glove embedding 
                            num_classes = 1) #output classes are 2 but neurons req out_class - 1

optimizer = Adam(custom_model.parameters(), lr=learning_rate)

In [95]:
from tqdm import tqdm

loss_function = torch.nn.BCELoss()

for epochs in tqdm(range(10)):
    if epochs > 1:
        print("Loss " + str(loss))
    for step, batch in enumerate(train_dataloader):
        batch = tuple(t.to(device) for t in batch)
        b_input_ids, b_labels = batch
        
        #clear gradients 
        custom_model.zero_grad()
        
        #get output and transform it according to labels 
        outputs, attn_out, attn_out_trans = custom_model(b_input_ids)
        class_probs = torch.mean(outputs, dim=1)
        
        #reshape labels and convert to float 
        b_labels_2d = b_labels.view(-1, 1)
        b_labels_2d = b_labels_2d.float()
        
        loss = loss_function(class_probs, b_labels_2d)
        #print("Loss " + str(loss))
        #compute gradient 
        loss.backward()
        
        #update parameters 
        optimizer.step()

 20%|████████▊                                   | 2/10 [00:13<00:54,  6.87s/it]

Loss tensor(0.0731, grad_fn=<BinaryCrossEntropyBackward0>)


 30%|█████████████▏                              | 3/10 [00:20<00:47,  6.85s/it]

Loss tensor(0.0217, grad_fn=<BinaryCrossEntropyBackward0>)


 40%|█████████████████▌                          | 4/10 [00:27<00:41,  6.87s/it]

Loss tensor(0.0686, grad_fn=<BinaryCrossEntropyBackward0>)


 50%|██████████████████████                      | 5/10 [00:34<00:34,  6.87s/it]

Loss tensor(0.0095, grad_fn=<BinaryCrossEntropyBackward0>)


 60%|██████████████████████████▍                 | 6/10 [00:41<00:27,  6.87s/it]

Loss tensor(0.0216, grad_fn=<BinaryCrossEntropyBackward0>)


 70%|██████████████████████████████▊             | 7/10 [00:48<00:20,  6.87s/it]

Loss tensor(0.0053, grad_fn=<BinaryCrossEntropyBackward0>)


 80%|███████████████████████████████████▏        | 8/10 [00:55<00:13,  6.92s/it]

Loss tensor(0.0004, grad_fn=<BinaryCrossEntropyBackward0>)


 90%|███████████████████████████████████████▌    | 9/10 [01:01<00:06,  6.90s/it]

Loss tensor(0.0017, grad_fn=<BinaryCrossEntropyBackward0>)


100%|███████████████████████████████████████████| 10/10 [01:08<00:00,  6.88s/it]


In [96]:
# test
test_sentence = 'Fuck this bitch. This sentence should be offensive.'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

print(torch.mean(output, dim=1))

['fuck', 'this', 'bitch', '.', 'this', 'sentence', 'should', 'be', 'offensive', '.', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tensor([[0.9957]], grad_fn=<MeanBackward1>)


In [98]:
attn_out.shape

torch.Size([1, 25, 25])

In [99]:
attn_out = attn_out.squeeze()
attn_out.shape

torch.Size([25, 25])

In [102]:
attn_out.sum(axis = 0)

tensor([ 6.6792,  0.6173, 13.7180,  1.5444,  0.1918,  0.2316,  0.0788,  0.0442,
         0.4992,  0.0846,  0.1362,  0.1795,  0.1809,  0.1549,  0.1219,  0.0943,
         0.0752,  0.0629,  0.0552,  0.0499,  0.0461,  0.0428,  0.0399,  0.0370,
         0.0343], grad_fn=<SumBackward1>)

In [103]:
# test
test_sentence = 'This nigger tried to kill me. This sentence should be offensive.'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

print(torch.mean(output, dim=1))

['this', 'nigger', 'tried', 'to', 'kill', 'me', '.', 'this', 'sentence', 'should', 'be', 'offensive', '.', '', '', '', '', '', '', '', '', '', '', '', '']
tensor([[0.9998]], grad_fn=<MeanBackward1>)


In [104]:
attn_out = attn_out.squeeze()
attn_out.shape

torch.Size([25, 25])

In [105]:
attn_out.sum(axis = 0)

tensor([0.0587, 5.8318, 2.7673, 1.6082, 3.5135, 5.7211, 2.0190, 0.3526, 0.4417,
        0.1163, 0.0391, 0.4026, 0.0880, 0.1672, 0.2640, 0.3274, 0.3336, 0.2919,
        0.2255, 0.1570, 0.1024, 0.0663, 0.0453, 0.0333, 0.0263],
       grad_fn=<SumBackward1>)

In [113]:
# test
test_sentence = 'Bastard tried to kill me'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

print(torch.mean(output, dim=1))

['bastard', 'tried', 'to', 'kill', 'me', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '', '']
tensor([[0.9999]], grad_fn=<MeanBackward1>)


In [114]:
attn_out = attn_out.squeeze()
attn_out.shape

torch.Size([25, 25])

In [115]:
attn_out.sum(axis = 0)

tensor([0.9454, 0.5490, 0.3616, 1.4496, 2.9873, 2.5174, 2.3567, 2.8675, 3.2261,
        2.8674, 2.0675, 1.2733, 0.6942, 0.3481, 0.1742, 0.0953, 0.0589, 0.0404,
        0.0297, 0.0229, 0.0184, 0.0152, 0.0129, 0.0112, 0.0099],
       grad_fn=<SumBackward1>)

In [126]:
np.argsort(attn_out.sum(axis=0).detach().numpy())

array([24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13,  2,  1, 12,  0, 11,
        3, 10,  6,  5,  9,  7,  4,  8])

In [127]:
padded

['bastard',
 'tried',
 'to',
 'kill',
 'me',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '',
 '']

In [131]:
# test
test_sentence = 'This nigger tried to kill me. This sentence should be offensive.'
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

#print(torch.mean(output, dim=1))

attn_out = attn_out.squeeze()
attn_out = attn_out.sum(axis = 0)
words = np.argsort(attn_out.detach().numpy())[-5:]

print(words)

for word in words:
    print(padded[word])

['this', 'nigger', 'tried', 'to', 'kill', 'me', '.', 'this', 'sentence', 'should', 'be', 'offensive', '.', '', '', '', '', '', '', '', '', '', '', '', '']
[6 2 4 5 1]
.
tried
kill
me
nigger


In [132]:
# test
test_sentence = "I am transgender but I'm also mentally ill but it's still appreciated."
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

#print(torch.mean(output, dim=1))

attn_out = attn_out.squeeze()
attn_out = attn_out.sum(axis = 0)
words = np.argsort(attn_out.detach().numpy())[-5:]

print(words)

for word in words:
    print(padded[word])

['i', 'am', 'transgender', 'but', 'i', "'", 'm', 'also', 'mentally', 'ill', 'but', 'it', "'", 's', 'still', 'appreciated', '.', '', '', '', '', '', '', '', '']
[1 0 2 8 9]
am
i
transgender
mentally
ill


In [137]:
# test
test_sentence = "I am transgender but I'm also mentally ill but it's still appreciated."
tokenized_test_sentence = tokenizer(test_sentence)

#pad
pad_tokenized_test_sentence = pad_sentence(tokenized_test_sentence)
padded = pad_tokenized_test_sentence
print(pad_tokenized_test_sentence)

#word - index
pad_tokenized_test_sentence = glove_vocab(pad_tokenized_test_sentence)

#tensor
pad_tokenized_test_sentence = torch.tensor(pad_tokenized_test_sentence)

#shape
pad_tokenized_test_sentence = pad_tokenized_test_sentence.view(1, -1)
pad_tokenized_test_sentence.shape

custom_model.eval()
output, attn_out, attn_out_trans = custom_model(pad_tokenized_test_sentence)

output = torch.mean(output, dim=1).detach().numpy()
print(output[0][0])

attn_out = attn_out.squeeze()
attn_out = attn_out.sum(axis = 0)
words = np.argsort(attn_out.detach().numpy())[-5:]

print(words)

for word in words:
    print(padded[word])

['i', 'am', 'transgender', 'but', 'i', "'", 'm', 'also', 'mentally', 'ill', 'but', 'it', "'", 's', 'still', 'appreciated', '.', '', '', '', '', '', '', '', '']
0.00047157696
[1 0 2 8 9]
am
i
transgender
mentally
ill


In [142]:
def classify_identify(text):
    tokenized_sentence = tokenizer(text)
    pad_tokenized_sentence = pad_sentence(tokenized_sentence)
    padded = pad_tokenized_sentence
    pad_tokenized_sentence = glove_vocab(pad_tokenized_sentence)
    pad_tokenized_sentence = torch.tensor(pad_tokenized_sentence)
    pad_tokenized_sentence = pad_tokenized_sentence.view(1, -1)
    custom_model.eval()
    output, attn_out, attn_out_trans = custom_model(pad_tokenized_sentence)
    output = torch.mean(output, dim=1).detach().numpy()
    
    label = 'non-offensive'
    if output[0][0] > 0.6:
        label = 'offensive'
        
    attn_out = attn_out.squeeze()
    attn_out = attn_out.sum(axis = 0)
    words = np.argsort(attn_out.detach().numpy())[-5:]
    
    words_list = []
    
    for word in words:
        words_list.append(padded[word])
        
    return label, words_list
    

In [143]:
classify_identify("I am transgender but I'm also mentally ill but it's still appreciated.")

('non-offensive', ['am', 'i', 'transgender', 'mentally', 'ill'])

# Test implementation

In [133]:
test_df = pd.read_csv('data/test.tsv', delimiter='\t', header=None, names=['text'])
test_df

Unnamed: 0,text
0,JEW Get the fuck out of here you jewish son of...
1,This is how I too feel about #LGBTQ #Pride It'...
2,"You're right, I didn't think about that. And w..."
3,I am transgender but I'm also mentally ill but...
4,trump is the only jews true friend. I hope tru...
...,...
8642,Hitler didn't finish it. Can u. If a nigger ur...
8643,"Ah yeah, I'm actually Dominican myself! And mu..."
8644,"""@iamkrause: No need to thank me, killing nigg..."
8645,+Kinda Linda yea I know I just wish people wou...


In [144]:
from tqdm import tqdm

In [145]:
test_df_annotated = pd.DataFrame(columns = ['text', 'label', 'words'])

In [147]:
for text in tqdm(test_df['text']):
    label, words = classify_identify(text)
    row  = pd.DataFrame([[text, 
                         label,
                         words]],
                         columns = test_df_annotated.columns)
    
    test_df_annotated = pd.concat([row, test_df_annotated], ignore_index = True)

100%|██████████████████████████████████████| 8647/8647 [00:10<00:00, 804.93it/s]


In [148]:
test_df_annotated.head()

Unnamed: 0,text,label,words
0,"Hey ,muslim dickhead !!!!!!!! Why don;t you go...",offensive,"[or, !, shove, allah, fuck]"
1,+Kinda Linda yea I know I just wish people wou...,non-offensive,"[+kinda, see, islam, to, digging]"
2,"""@iamkrause: No need to thank me, killing nigg...",offensive,"[pal, killing, dis, is, niggers]"
3,"Ah yeah, I'm actually Dominican myself! And mu...",non-offensive,"[yeah, dominican, and, myself, !]"
4,Hitler didn't finish it. Can u. If a nigger ur...,offensive,"[in, jew, street, the, u]"


In [149]:
test_df_annotated.to_csv("annotated_test_data.csv")