In [17]:
import numpy as np, re
import torch
import torch.nn as nn
import torch.optim as optim
from torchtext.datasets import TranslationDataset, Multi30k
from torchtext.data import Field, BucketIterator, ReversibleField, TabularDataset, Iterator
import torch.nn.functional as F
from sklearn.metrics import precision_recall_fscore_support as score
import random
import math
import os

In [18]:
TWEET = Field()
LABEL = Field(sequential=False, use_vocab=False)

fields = {'tweet_50': ('t', TWEET), 'label_sa': ('l', LABEL)}

train_data, valid_data, test_data = TabularDataset.splits(
                                        path = 'ArSAS/json',
                                        train = 'train_final.json',
                                        validation = 'dev_final.json',
                                        test = 'test_final.json',
                                        format = 'json',
                                        fields = fields
)

print(vars(train_data[0]))
print(vars(valid_data[0]))
print(vars(test_data[0]))
TWEET.build_vocab(train_data)
LABEL.build_vocab(train_data)
BATCH_SIZE = 2

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
train_iterator, valid_iterator, test_iterator = BucketIterator.splits(
    (train_data, valid_data, test_data), sort=False, batch_size= BATCH_SIZE, device=device)
print(type(train_iterator))

print('Train:')
for batch in train_iterator:
    print(batch)
    break

print('dev:')
for batch in valid_iterator:
    print(batch)
    break



{'t': ['المباراه', 'القادمه', 'HASH', 'x', 'HASH', 'الجوله', 'الاخيره', 'من', 'المجموعه', 'ال', 'NUM', 'تصفيات', 'كاس', 'العالم', 'NUM', 'روسيا', 'ترتيب', 'مصر', 'المركز', 'الاول', 'NUM', 'نقطه', 'تم', 'حسم', 'التاهل', 'للمونديال', 'غدا', 'الساعه', 'NUM', 'NUM', 'ع', 'قناه', 'بين', 'سبورت', 'NUM', 'توقعاتكم', 'ل', 'نتيجه', 'الماتش', 'NUM', 'URL\n'], 'l': 0}
{'t': ['هل', 'هذه', 'هي', 'سياسه', 'خارجيه', 'لدوله', 'تحترم', 'نفسها', 'والاخرين', 'HASH', 'عدوان', 'وحصار', 'ل', 'NUM', 'سنوات', 'HASH', 'قمع', 'حراك', 'شعبها', 'المسالم', 'المطالب', 'بالمساواه', 'والعداله', 'HASH', 'دعموا', 'الارهاب', 'وارسلوا', 'المال', 'والسلاح', 'والانتحاريين', 'HASH', 'حصار', 'ومحاوله', 'فرض', 'الوصايه', 'والان', 'HASH', 'محاوله', 'فرض', 'وصايه', 'علني\n'], 'l': 2}
{'t': ['ومع', 'السيسي', 'و', 'بشار', 'و', 'ايران', 'و', 'بن', 'زايد', 'و', 'والا', 'خليفه', 'و', 'روافض', 'العراق', 'و', 'حفتر', 'و', 'علي', 'صالح', 'كل', 'طواغيت', 'العرب', 'العلاقات', 'عسل', 'علي', 'سمن\n'], 'l': 2}
cpu
<class 'torchtext.data.ite

In [19]:
class Attention(nn.Module):
    def __init__(self, query_dim, key_dim, value_dim):
        super(Attention, self).__init__()
        self.scale = 1. / math.sqrt(query_dim)

    def forward(self, query, keys, values):
        # Query = Hidden
        # hidden dim => (batch size, enc_hid_dim)
        # Query = [BxQ]
        # Keys = [TxBxK]
        # Values = [TxBxV]
        # Outputs = a:[TxB], lin_comb:[BxV]

        # Here we assume q_dim == k_dim (dot product attention)

        query = query.unsqueeze(1) # [BxQ] -> [Bx1xQ]
        keys = keys.transpose(0,1).transpose(1,2) # [TxBxK] -> [BxKxT]
        energy = torch.bmm(query, keys) # [Bx1xQ]x[BxKxT] -> [Bx1xT]
        energy = F.softmax(energy.mul_(self.scale), dim=2) # scale, normalize

        values = values.transpose(0,1) # [TxBxV] -> [BxTxV]
        linear_combination = torch.bmm(energy, values).squeeze(1) #[Bx1xT]x[BxTxV] -> [BxV]
        return energy, linear_combination


In [20]:
class Encoder(nn.Module):
    def __init__(self, attention, input_dim, emb_dim, enc_hid_dim, output_dim, dropout):
        super().__init__()
        self.input_dim = input_dim
        self.emb_dim = emb_dim
        self.enc_hid_dim = enc_hid_dim
        self.dropout = dropout
        self.attention=attention

        self.embedding = nn.Embedding(input_dim, emb_dim)
        self.rnn = nn.GRU(emb_dim, enc_hid_dim, num_layers= 1, bidirectional=True)
        self.linear = nn.Linear(enc_hid_dim*2, output_dim)
        self.dropout = nn.Dropout(dropout)
        
        
    def forward(self, input):
        
        # input dim => (src_sent_len, batch_size)
        print("input shape ", input)
        embed = self.dropout(self.embedding(input))
        

        # embed dim => (src_sent_len, batch_size, emb_dim)
        outputs, hidden = self.rnn(embed)
        print("outputs ", outputs)
        print("....")
        print("hidden ", hidden)
        # outputs dim => (src_sent_len, batch size, enc_hid_dim * num directions)
        # hidden dim => (number of layers * number of directions, batch size, enc_hid_dim)
        hidden = torch.cat((hidden[-2, : , :], hidden[-1, :, :]), dim = 1)
        # hidden[-2, :, :] reduces 3d to 2d tensor since first dimension is now fixed, so dim = 1 is the last dimension
        # hidden dim => (batch size, enc_hid_dim)


        energy, attn_output = self.attention(hidden, outputs, outputs)
        predictions = self.linear(attn_output)
        
        
        
        return predictions

In [21]:
speech_act_labels = ['Assertion', 'Recommendation', 'Expression', 'Question', 'Request', 'Miscellaneous']


INPUT_DIM = len(TWEET.vocab)
OUTPUT_DIM = len(speech_act_labels) - 1  # no miscellaneous class
ENC_EMB_DIM = 256
ENC_HID_DIM = 512
ENC_DROPOUT = 0.5
ATTN_DIM = 500

attn = Attention(ATTN_DIM, ATTN_DIM, ATTN_DIM)
model = Encoder(attn, INPUT_DIM, ENC_EMB_DIM, ENC_HID_DIM, OUTPUT_DIM, ENC_DROPOUT).to(device)


optimizer = optim.Adam(model.parameters())

# pad_idx = TRG.vocab.stoi['<pad>']

criterion = nn.CrossEntropyLoss()



In [22]:
import sys
def train(model, iterator, optimizer, criterion, clip):
    
    model.train()
    
    epoch_loss = 0
    
    for i, batch in enumerate(iterator):
        print("train iterator ", i)
        src = batch.t
        trg = batch.l
#         print("src")
#         print(src)
#         print("target")
#         print(trg)
        optimizer.zero_grad()
        
        output = model(src)
        sys.exit(0)
#         print("output after seq2seq")
#         print(output)
#         print(output.shape)
        #trg = [sent len, batch size]
        #output = [sent len, batch size, output dim]
        
        #reshape to:
        #trg = [(sent len - 1) * batch size]
        #output = [(sent len - 1) * batch size, output dim]
        loss = criterion(output, trg)
        print("loss ", loss)
        loss.backward()
        
        torch.nn.utils.clip_grad_norm_(model.parameters(), clip)
        
        optimizer.step()
        
        epoch_loss += loss.item()
        
        
    return epoch_loss / len(iterator)

In [23]:
def evaluate(model, iterator, criterion, testing):
    
    model.eval()
    epoch_loss = 0
    ground_truth, classification = [], []
    
    with torch.no_grad():
    
        for i, batch in enumerate(iterator):

            src = batch.t
            trg = batch.l
            
            output = model(src) #turn off teacher forcing
            
            loss = criterion(output, trg)

            epoch_loss += loss.item()
            if testing:
                
                for j, ind_output in enumerate(output):
                    max_index = ind_output.max(0)[1]
                    classification.append(max_index.item())
                    ground_truth.append(trg[j].item())
                    
    if testing:
        print("trg ", trg.shape)
        print("classification ", len(classification))
        precision, recall, fscore, support = score(np.array(ground_truth), classification)
        print("Detailed evaluation:")
        print('precision: {}'.format(precision))
        print('recall: {}'.format(recall))
        print('fscore: {}'.format(fscore))
        print('support: {}'.format(support))
 
    return epoch_loss / len(iterator)

In [24]:
N_EPOCHS = 20
CLIP = 0.001
SAVE_DIR = 'models'
MODEL_SAVE_PATH = os.path.join(SAVE_DIR, 'sa_weights_50.pt')

best_valid_loss = float('inf')

if not os.path.isdir(f'{SAVE_DIR}'):
    os.makedirs(f'{SAVE_DIR}')

for epoch in range(N_EPOCHS):
    print("epoch ", epoch)
    train_loss = train(model, train_iterator, optimizer, criterion, CLIP)
    valid_loss = evaluate(model, valid_iterator, criterion, False)
    
    if valid_loss < best_valid_loss:
        best_valid_loss = valid_loss
        torch.save(model.state_dict(), MODEL_SAVE_PATH)
    
    print(f'| Epoch: {epoch+1:03} | Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f} | Val. Loss: {valid_loss:.3f} | Val. PPL: {math.exp(valid_loss):7.3f} |')
    print(f'| Epoch: {epoch+1:03} | Train Loss: {train_loss:.3f} | Train PPL: {math.exp(train_loss):7.3f}  |')

epoch  0
train iterator  0
input shape  tensor([[26376,   191],
        [   11,  2072],
        [ 7605,   481],
        [ 4295,    39],
        [   70, 15346],
        [   69,    13],
        [19660, 17791],
        [ 1931, 27191],
        [ 9831,  5142],
        [18853, 19563],
        [    2,     1],
        [   43,     1],
        [ 4339,     1],
        [    6,     1],
        [  696,     1],
        [14833,     1],
        [   11,     1],
        [ 4685,     1],
        [23431,     1],
        [   19,     1],
        [  129,     1],
        [ 1975,     1],
        [   11,     1],
        [  723,     1],
        [  129,     1],
        [ 5241,     1],
        [ 2048,     1],
        [   11,     1],
        [  882,     1],
        [    6,     1],
        [  377,     1],
        [ 3273,     1],
        [  292,     1],
        [ 6660,     1],
        [  163,     1],
        [   13,     1],
        [  182,     1]])
outputs  tensor([[[-0.3701, -0.2110, -0.4401,  ...,  0.3120,  0.0869, -

SystemExit: 0

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [None]:
model.load_state_dict(torch.load(MODEL_SAVE_PATH))

test_loss = evaluate(model, test_iterator, criterion, True)

print(f'| Test Loss: {test_loss:.3f} | Test PPL: {math.exp(test_loss):7.3f} |')