In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%matplotlib inline
!pip install torch>=1.3.1
!pip install torchtext==0.4

Collecting torchtext==0.4
  Downloading torchtext-0.4.0-py3-none-any.whl (53 kB)
[?25l[K     |██████▏                         | 10 kB 21.7 MB/s eta 0:00:01[K     |████████████▍                   | 20 kB 27.1 MB/s eta 0:00:01[K     |██████████████████▌             | 30 kB 11.8 MB/s eta 0:00:01[K     |████████████████████████▊       | 40 kB 9.4 MB/s eta 0:00:01[K     |██████████████████████████████▉ | 51 kB 5.3 MB/s eta 0:00:01[K     |████████████████████████████████| 53 kB 1.3 MB/s 
Installing collected packages: torchtext
  Attempting uninstall: torchtext
    Found existing installation: torchtext 0.10.0
    Uninstalling torchtext-0.10.0:
      Successfully uninstalled torchtext-0.10.0
Successfully installed torchtext-0.4.0


In [3]:
import torch
import torchtext
#from torchtext.legacy.datasets import text_classification
from torchtext.datasets import text_classification

import os
if not os.path.isdir('./.data'):
    os.mkdir('./.data')
train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS'](
    root='./.data', vocab=None)
BATCH_SIZE = 16

ag_news_csv.tar.gz: 100%|██████████| 11.8M/11.8M [00:00<00:00, 107MB/s]
120000lines [00:09, 12515.93lines/s]
120000lines [00:19, 6019.88lines/s]
7600lines [00:01, 6375.64lines/s]


In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [5]:
import torch
import torchtext
from torchtext import data

In [6]:
max_seq_len = 50
TEXT = data.Field(tokenize="spacy", batch_first=True, include_lengths=True, fix_length=max_seq_len)
LABEL = data.LabelField(dtype=torch.float, batch_first=True)

In [7]:
fields = [('label', LABEL), (None, None), ('text',TEXT)]

In [8]:
%ls drive/MyDrive/UNT/AG_news

ls: cannot access 'drive/MyDrive/UNT/AG_news': No such file or directory


In [10]:
training_data=data.TabularDataset(path = 'drive/MyDrive/train.csv',format = 'csv',fields = fields,skip_header = True)

In [11]:
print(vars(training_data.examples[0]))

{'label': '3', 'text': ['Reuters', '-', 'Private', 'investment', 'firm', 'Carlyle', 'Group,\\which', 'has', 'a', 'reputation', 'for', 'making', 'well', '-', 'timed', 'and', 'occasionally\\controversial', 'plays', 'in', 'the', 'defense', 'industry', ',', 'has', 'quietly', 'placed\\its', 'bets', 'on', 'another', 'part', 'of', 'the', 'market', '.']}


In [12]:
train_data, valid_data = training_data.split(split_ratio=0.1)

In [13]:
#initialize glove embeddings
TEXT.build_vocab(train_data,min_freq=3,vectors = "glove.6B.300d")  
LABEL.build_vocab(train_data)

#No. of unique tokens in text
print("Size of TEXT vocabulary:",len(TEXT.vocab))

#No. of unique tokens in label
print("Size of LABEL vocabulary:",len(LABEL.vocab))

#Commonly used words
print(TEXT.vocab.freqs.most_common(10))  

#Word dictionary
# print(TEXT.vocab.stoi)

.vector_cache/glove.6B.zip: 862MB [02:41, 5.35MB/s]                           
100%|█████████▉| 399999/400000 [00:45<00:00, 8789.19it/s]


Size of TEXT vocabulary: 2269
Size of LABEL vocabulary: 4
[('the', 1674), (',', 1365), ('.', 1252), ('-', 1059), ('a', 970), ('of', 906), ('to', 869), ('in', 757), ('and', 616), (' ', 545)]


In [14]:
batch_size = 64

train_iterator, valid_iterator = data.BucketIterator.splits((train_data, valid_data), batch_size=batch_size,
                                                           sort_key=lambda x: len(x.text),
                                                           sort_within_batch=True)

In [15]:
# Create neural network representation
import torch.nn as nn
import torch.nn.functional as F
from torch.autograd import Variable

class CNNTextClassification(nn.Module):
    def __init__(self, vocabulary_size, embedding_size, max_seq_len, out_channels,
                 kernel_heights, dropout, num_class):
        super().__init__()
        self.out_channels = out_channels
        self.kernel_heights = kernel_heights
        self.embedding_size = embedding_size
        self.max_seq_len = max_seq_len
        
        self.embedding = nn.Embedding(vocabulary_size, embedding_size)
        
        self.conv1 = nn.Sequential(nn.Conv1d(in_channels=self.embedding_size, out_channels=self.out_channels,
                               kernel_size=self.kernel_heights[0]),
                                   nn.ReLU(),
                                  nn.MaxPool1d(self.max_seq_len - self.kernel_heights[0]+1))
        
        self.conv2 = nn.Sequential(nn.Conv1d(in_channels=self.embedding_size, out_channels=self.out_channels,
                               kernel_size=self.kernel_heights[1]),
                                   nn.ReLU(),
                                  nn.MaxPool1d(self.max_seq_len - self.kernel_heights[1]+1))
        
        self.conv3 = nn.Sequential(nn.Conv1d(in_channels=self.embedding_size, out_channels=self.out_channels,
                               kernel_size=self.kernel_heights[2]),
                                   nn.ReLU(),
                                  nn.MaxPool1d(self.max_seq_len - self.kernel_heights[2]+1))
        
        self.dropout = nn.Dropout(dropout)
        
        self.fc = nn.Linear(len(self.kernel_heights) * out_channels, num_class)
        
        self.softmax = nn.LogSoftmax(dim=-1)
        
    def forward(self, text, text_lengths):
        emb = self.embedding(text).permute(0, 2, 1)
        
        conv_out1 = self.conv1(emb).squeeze(2)
        conv_out2 = self.conv2(emb).squeeze(2)
        conv_out3 = self.conv3(emb).squeeze(2)
        
        all_out = torch.cat((conv_out1, conv_out2, conv_out3), 1)
        final_feature_map = self.dropout(all_out)
        
        final_out = self.fc(final_feature_map)
        
        return self.softmax(final_out)

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

class RNN(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
		super(RNN, self).__init__()

		"""
		Arguments
		---------
		batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
		output_size : 2 = (pos, neg)
		hidden_sie : Size of the hidden_state of the LSTM
		vocab_size : Size of the vocabulary containing unique words
		embedding_length : Embeddding dimension of GloVe word embeddings
		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
		
		"""

		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		
		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
		self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False)
		self.rnn = nn.RNN(embedding_length, hidden_size, num_layers=2, bidirectional=True)
		self.label = nn.Linear(4*hidden_size, output_size)
	
	def forward(self, input_sentences, batch_size=None):
		
		""" 
		Parameters
		----------
		input_sentence: input_sentence of shape = (batch_size, num_sequences)
		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
		
		Returns
		-------
		Output of the linear layer containing logits for pos & neg class which receives its input as the final_hidden_state of RNN.
		logits.size() = (batch_size, output_size)
		
		"""

		input = self.word_embeddings(input_sentences)
		input = input.permute(1, 0, 2)
		if batch_size is None:
			h_0 = Variable(torch.zeros(4, self.batch_size, self.hidden_size).cuda()) # 4 = num_layers*num_directions
		else:
			h_0 =  Variable(torch.zeros(4, batch_size, self.hidden_size).cuda())
		output, h_n = self.rnn(input, h_0)
		# h_n.size() = (4, batch_size, hidden_size)
		h_n = h_n.permute(1, 0, 2) # h_n.size() = (batch_size, 4, hidden_size)
		h_n = h_n.contiguous().view(h_n.size()[0], h_n.size()[1]*h_n.size()[2])
		# h_n.size() = (batch_size, 4*hidden_size)
		logits = self.label(h_n) # logits.size() = (batch_size, output_size)
		
		return logits

In [None]:
# Use LSTM for the above scenario
# _*_ coding: utf-8 _*_

import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

class LSTMClassifier(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
		super(LSTMClassifier, self).__init__()
		
		"""
		Arguments
		---------
		batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
		output_size : 2 = (pos, neg)
		hidden_sie : Size of the hidden_state of the LSTM
		vocab_size : Size of the vocabulary containing unique words
		embedding_length : Embeddding dimension of GloVe word embeddings
		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
		
		"""
		
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		
		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)# Initializing the look-up table.
		self.word_embeddings.weight = nn.Parameter(weights, requires_grad=False) # Assigning the look-up table to the pre-trained GloVe word embedding.
		self.lstm = nn.LSTM(embedding_length, hidden_size)
		self.label = nn.Linear(hidden_size, output_size)
		
	def forward(self, input_sentence, batch_size=None):
	
		""" 
		Parameters
		----------
		input_sentence: input_sentence of shape = (batch_size, num_sequences)
		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
		
		Returns
		-------
		Output of the linear layer containing logits for positive & negative class which receives its input as the final_hidden_state of the LSTM
		final_output.shape = (batch_size, output_size)
		
		"""
		
		''' Here we will map all the indexes present in the input sequence to the corresponding word vector using our pre-trained word_embedddins.'''
		input = self.word_embeddings(input_sentence) # embedded input of shape = (batch_size, num_sequences,  embedding_length)
		input = input.permute(1, 0, 2) # input.size() = (num_sequences, batch_size, embedding_length)
		if batch_size is None:
			h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial hidden state of the LSTM
			c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda()) # Initial cell state of the LSTM
		else:
			h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0))
		final_output = self.label(final_hidden_state[-1]) # final_hidden_state.size() = (1, batch_size, hidden_size) & final_output.size() = (batch_size, output_size)
		
		return final_output

In [None]:
import torch
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F
import numpy as np

class AttentionModel(torch.nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
		super(AttentionModel, self).__init__()
		
		"""
		Arguments
		---------
		batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
		output_size : 2 = (pos, neg)
		hidden_sie : Size of the hidden_state of the LSTM
		vocab_size : Size of the vocabulary containing unique words
		embedding_length : Embeddding dimension of GloVe word embeddings
		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
		
		--------
		
		"""
		
		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		
		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
		self.word_embeddings.weights = nn.Parameter(weights, requires_grad=False)
		self.lstm = nn.LSTM(embedding_length, hidden_size)
		self.label = nn.Linear(hidden_size, output_size)
		#self.attn_fc_layer = nn.Linear()
		
	def attention_net(self, lstm_output, final_state):

		""" 
		Now we will incorporate Attention mechanism in our LSTM model. In this new model, we will use attention to compute soft alignment score corresponding
		between each of the hidden_state and the last hidden_state of the LSTM. We will be using torch.bmm for the batch matrix multiplication.
		
		Arguments
		---------
		
		lstm_output : Final output of the LSTM which contains hidden layer outputs for each sequence.
		final_state : Final time-step hidden state (h_n) of the LSTM
		
		---------
		
		Returns : It performs attention mechanism by first computing weights for each of the sequence present in lstm_output and and then finally computing the
				  new hidden state.
				  
		Tensor Size :
					hidden.size() = (batch_size, hidden_size)
					attn_weights.size() = (batch_size, num_seq)
					soft_attn_weights.size() = (batch_size, num_seq)
					new_hidden_state.size() = (batch_size, hidden_size)
					  
		"""
		
		hidden = final_state.squeeze(0)
		attn_weights = torch.bmm(lstm_output, hidden.unsqueeze(2)).squeeze(2)
		soft_attn_weights = F.softmax(attn_weights, 1)
		new_hidden_state = torch.bmm(lstm_output.transpose(1, 2), soft_attn_weights.unsqueeze(2)).squeeze(2)
		
		return new_hidden_state
	
	def forward(self, input_sentences, batch_size=None):
	
		""" 
		Parameters
		----------
		input_sentence: input_sentence of shape = (batch_size, num_sequences)
		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
		
		Returns
		-------
		Output of the linear layer containing logits for pos & neg class which receives its input as the new_hidden_state which is basically the output of the Attention network.
		final_output.shape = (batch_size, output_size)
		
		"""
		
		input = self.word_embeddings(input_sentences)
		input = input.permute(1, 0, 2)
		if batch_size is None:
			h_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(1, self.batch_size, self.hidden_size).cuda())
		else:
			h_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(1, batch_size, self.hidden_size).cuda())
			
		output, (final_hidden_state, final_cell_state) = self.lstm(input, (h_0, c_0)) # final_hidden_state.size() = (1, batch_size, hidden_size) 
		output = output.permute(1, 0, 2) # output.size() = (batch_size, num_seq, hidden_size)
		
		attn_output = self.attention_net(output, final_hidden_state)
		logits = self.label(attn_output)
		
		return logits

In [None]:
import torch.nn as nn
from torch.autograd import Variable
from torch.nn import functional as F

class SelfAttention(nn.Module):
	def __init__(self, batch_size, output_size, hidden_size, vocab_size, embedding_length, weights):
		super(SelfAttention, self).__init__()

		"""
		Arguments
		---------
		batch_size : Size of the batch which is same as the batch_size of the data returned by the TorchText BucketIterator
		output_size : 2 = (pos, neg)
		hidden_sie : Size of the hidden_state of the LSTM
		vocab_size : Size of the vocabulary containing unique words
		embedding_length : Embeddding dimension of GloVe word embeddings
		weights : Pre-trained GloVe word_embeddings which we will use to create our word_embedding look-up table 
		
		--------
		
		"""

		self.batch_size = batch_size
		self.output_size = output_size
		self.hidden_size = hidden_size
		self.vocab_size = vocab_size
		self.embedding_length = embedding_length
		self.weights = weights

		self.word_embeddings = nn.Embedding(vocab_size, embedding_length)
		self.word_embeddings.weights = nn.Parameter(weights, requires_grad=False)
		self.dropout = 0.8
		self.bilstm = nn.LSTM(embedding_length, hidden_size, dropout=self.dropout, bidirectional=True)
		# We will use da = 350, r = 30 & penalization_coeff = 1 as per given in the self-attention original ICLR paper
		self.W_s1 = nn.Linear(2*hidden_size, 350)
		self.W_s2 = nn.Linear(350, 30)
		self.fc_layer = nn.Linear(30*2*hidden_size, 2000)
		self.label = nn.Linear(2000, output_size)

	def attention_net(self, lstm_output):

		"""
		Now we will use self attention mechanism to produce a matrix embedding of the input sentence in which every row represents an
		encoding of the inout sentence but giving an attention to a specific part of the sentence. We will use 30 such embedding of 
		the input sentence and then finally we will concatenate all the 30 sentence embedding vectors and connect it to a fully 
		connected layer of size 2000 which will be connected to the output layer of size 2 returning logits for our two classes i.e., 
		pos & neg.

		Arguments
		---------

		lstm_output = A tensor containing hidden states corresponding to each time step of the LSTM network.
		---------

		Returns : Final Attention weight matrix for all the 30 different sentence embedding in which each of 30 embeddings give
				  attention to different parts of the input sentence.

		Tensor size : lstm_output.size() = (batch_size, num_seq, 2*hidden_size)
					  attn_weight_matrix.size() = (batch_size, 30, num_seq)

		"""
		attn_weight_matrix = self.W_s2(F.tanh(self.W_s1(lstm_output)))
		attn_weight_matrix = attn_weight_matrix.permute(0, 2, 1)
		attn_weight_matrix = F.softmax(attn_weight_matrix, dim=2)

		return attn_weight_matrix

	def forward(self, input_sentences, batch_size=None):

		""" 
		Parameters
		----------
		input_sentence: input_sentence of shape = (batch_size, num_sequences)
		batch_size : default = None. Used only for prediction on a single sentence after training (batch_size = 1)
		
		Returns
		-------
		Output of the linear layer containing logits for pos & neg class.
		
		"""

		input = self.word_embeddings(input_sentences)
		input = input.permute(1, 0, 2)
		if batch_size is None:
			h_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(2, self.batch_size, self.hidden_size).cuda())
		else:
			h_0 = Variable(torch.zeros(2, batch_size, self.hidden_size).cuda())
			c_0 = Variable(torch.zeros(2, batch_size, self.hidden_size).cuda())

		output, (h_n, c_n) = self.bilstm(input, (h_0, c_0))
		output = output.permute(1, 0, 2)
		# output.size() = (batch_size, num_seq, 2*hidden_size)
		# h_n.size() = (1, batch_size, hidden_size)
		# c_n.size() = (1, batch_size, hidden_size)
		attn_weight_matrix = self.attention_net(output)
		# attn_weight_matrix.size() = (batch_size, r, num_seq)
		# output.size() = (batch_size, num_seq, 2*hidden_size)
		hidden_matrix = torch.bmm(attn_weight_matrix, output)
		# hidden_matrix.size() = (batch_size, r, 2*hidden_size)
		# Let's now concatenate the hidden_matrix and connect it to the fully connected layer.
		fc_out = self.fc_layer(hidden_matrix.view(-1, hidden_matrix.size()[1]*hidden_matrix.size()[2]))
		logits = self.label(fc_out)
		# logits.size() = (batch_size, output_size)

		return logits

In [None]:
def training(model, iterator, optimizer, criterion):
    training_loss = 0
    training_accuracy = 0
    
    model.train()
    
    for batch in iterator:
        optimizer.zero_grad()
        
        text, text_lengths = batch.text
        target = batch.label
        target = torch.autograd.Variable(target).long()
        
        output = model(text, text_lengths).squeeze()
        
        loss = criterion(output, target)
        
        training_loss += loss.item()
        
        loss.backward()
        optimizer.step()
        
        num_corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).float().sum()

        acc = num_corrects/len(batch)
        training_accuracy += acc.item()

    scheduler.step()
    
    return training_loss / len(iterator), training_accuracy / len(iterator)

def testing(model, iterator, optimizer, criterion):
    testing_loss = 0
    testing_accuracy = 0
    model.eval()
    
    for batch in iterator:
        text, text_lengths = batch.text
        target = batch.label
        target = torch.autograd.Variable(target).long()
        
        with torch.no_grad():
            output = model(text, text_lengths).squeeze()
            loss = criterion(output, target)
            
            testing_loss += loss.item()
            num_corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).float().sum()
            acc = num_corrects/len(batch)
        
            testing_accuracy += acc.item()
            
    return testing_loss / len(iterator), testing_accuracy / len(iterator)

In [None]:
import time

n_epochs = 15
min_val_loss = float("inf")
path='drive/MyDrive/UNT/AG_news/model/saved_weights_cnn.pt'

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.3)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)

for epoch in range(n_epochs):
    start_time = time.time()
    
    train_loss, train_acc = training(model, train_iterator, optimizer, criterion)
    val_loss, val_acc = testing(model, valid_iterator, optimizer, criterion)
    
    secs = int(time.time() - start_time)
    mins = secs / 60
    secs = secs % 60

    print('Epoch: %d' %(epoch + 1), " | time in %d minutes, %d seconds" %(mins, secs))
    print(f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.2f}%(train)')
    print(f'\tLoss: {val_loss:.4f}(valid)\t|\tAcc: {val_acc * 100:.2f}%(valid)')
    
    if val_loss < min_val_loss:
        min_val_loss = val_loss
        torch.save(model.state_dict(), path)

In [None]:
testing_data=data.TabularDataset(path = 'drive/MyDrive/test.csv',format = 'csv',fields = fields,skip_header = True)
testing_iterator = data.BucketIterator(testing_data, batch_size=batch_size,
                                                           sort_key=lambda x: len(x.text),
                                                           sort_within_batch=True)

In [None]:
def predict(model, iterator):
    testing_accuracy = 0
    model.eval()
    
    for batch in iterator:
        text, text_lengths = batch.text
        # text = TEXT.preprocess(text)
        label = batch.label
        target = torch.autograd.Variable(label).long()
        with torch.no_grad():
            output = model(text, text_lengths).squeeze()
            num_corrects = (torch.max(output, 1)[1].view(target.size()).data == target.data).float().sum()
            acc = num_corrects / len(batch)
            testing_accuracy += acc.item()
    
    return testing_accuracy / len(iterator)

In [16]:
vocabulary_size = len(TEXT.vocab)
n_class = len(LABEL.vocab)
embedding_size = 300
out_channels = 100
kernel_heights = [3, 4, 5]
dropout = 0.4

cnn = CNNTextClassification(vocabulary_size, embedding_size, max_seq_len,
                              out_channels, kernel_heights, dropout, n_class)
rnn = RNN(batch_size=BATCH_SIZE, output_size=output_size, hidden_size=32, vocab_size=vocab_size, embedding_length=embedding_dim, weights=embed_lookup)
lstm = LSTM(batch_size=BATCH_SIZE, output_size=output_size, hidden_size=32, vocab_size=vocab_size, embedding_length=embedding_dim, weights=embed_lookup)
lstm_A = AttentionModel(batch_size=BATCH_SIZE, output_size=2, hidden_size=32, vocab_size=len(train_dataset.get_vocab()), embedding_length=20, weights=embedding_matrix)
self_A = SelfAttention(batch_size=BATCH_SIZE, output_size=output_size, hidden_size=32, vocab_size=vocab_size, embedding_length=embedding_dim, weights=embed_lookup)

In [27]:
pretrained_embeddings = TEXT.vocab.vectors
cnn.embedding.weight.data.copy_(pretrained_embeddings)

tensor([[ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  ...,  0.0000,  0.0000,  0.0000],
        [ 0.0466,  0.2132, -0.0074,  ...,  0.0091, -0.2099,  0.0539],
        ...,
        [-0.4984,  0.4048, -0.2479,  ..., -0.2578, -0.2369,  0.0047],
        [-0.5809,  0.0953, -0.0321,  ...,  0.7431, -0.3895, -0.1347],
        [-0.2117,  0.1462,  0.0605,  ..., -0.4895, -0.0707, -0.0814]])

In [None]:
cnn.load_state_dict(torch.load(path))
rnn.load_state_dict(torch.load(path))
lstm.load_state_dict(torch.load(path))
lstm_A.load_state_dict(torch.load(path))
self_A.load_state_dict(torch.load(path))

In [26]:
test_acc = predict(cnn, testing_iterator)
print(f"Accuracy {test_acc * 100:.2f}")

NameError: ignored

In [None]:
test_acc = predict(rnn, testing_iterator)
print(f"Accuracy {test_acc * 100:.2f}")

In [None]:
test_acc = predict(lstm, testing_iterator)
print(f"Accuracy {test_acc * 100:.2f}")

In [None]:
test_acc = predict(lstm_A, testing_iterator)
print(f"Accuracy {test_acc * 100:.2f}")

In [None]:
test_acc = predict(self_A, testing_iterator)
print(f"Accuracy {test_acc * 100:.2f}")

In [None]:
Is it possible to use transformer for the above scenario i.e., Text Sentiment Analysis / Classification. Answer yes/no. If yes implement using the resource below
Yes, but I am too tired