In [None]:
'''Uncomment below for colab'''
# ! git clone https://github.com/sriyash421/Adversarial-Attention.git
# ! cd Adversarial-Attention && ls && git checkout bert
# ! pip install transformers
# import sys
# sys.path.append('./Adversarial-Attention')

In [2]:
import json
import torch
import pickle
import numpy as np
import torch.nn as nn
from scipy import stats
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from generate_embeddings import *
from nn_utils import Attention, Discriminator, EmotionRegression, FeatureExtaction
from transformers import BertTokenizer, BertModel, AdamW, get_linear_schedule_with_warmup

ModuleNotFoundError: No module named 'generate_embeddings'

In [7]:
# torch.manual_seed(0)
batch_size = 64
lr_attn = 1e-4
lr_feature = 8e-5
lr_regressor = 4e-5
lr_discriminator = 4e-5
epochs = 100
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cuda:0


In [None]:
class EmoBank(Dataset) :
    def __init__(self, PATH="./Adversarial-Attention/data/emobank.csv", type=None, target_type=['V','A']) :
        self.data = pd.read_csv(PATH).to_dict(orient="records")
        self.tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
        self.sentences = []
        self.targets = []

        for i in trange(len(self.data)) :
            item = self.data[i]
            if(type != item["split"]) :
                continue
            sentence = item["text"]
            # urls = re.findall('http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\(\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+', sentence)
            # for url in urls: sentence = sentence.replace(url,'')
            target = torch.tensor([item[i] for i in target_type])
            self.sentences.append(sentence)
            self.targets.append(target)

        self.encode()
        print("Dataset {} size: {}".format(type, len(self.sentences)))
    
    def __len__(self) :
        return len(self.sentences)
    
    def __getitem__(self, idx) :
        return self.input_ids[idx], self.attention_masks[idx], self.token_type_ids[idx], self.targets[idx], self.source_lengths[idx]

    def encode(self) :
        self.input_ids = []
        self.attention_masks = []
        self.token_type_ids = []
        self.max_len, self.source_lengths = self.max_length()
        for sent in self.sentences :
            encoded_dict = self.tokenizer.encode_plus(sent,  
                                                    max_length=self.max_len, 
                                                    pad_to_max_length="True", 
                                                    return_attention_mask = True,
                                                    return_tensors = 'pt', 
                                                    return_token_type_ids = True,
                                                    return_lengths = True)
            self.input_ids.append(encoded_dict['input_ids'])
            self.attention_masks.append(encoded_dict['attention_mask'])
            self.token_type_ids.append(encoded_dict['token_type_ids'])
        
        self.input_ids = torch.cat(self.input_ids, dim=0)
        self.attention_masks = torch.cat(self.attention_masks, dim=0)
        self.token_type_ids = torch.cat(self.token_type_ids, dim=0)
        self.source_lengths = torch.LongTensor(self.source_lengths)
        print("input ids: {} attention_masks: {} token_type_ids: {} source_lengths: {}".format(
            self.input_ids.shape, self.attention_masks.shape, self.token_type_ids.shape, self.source_lengths.shape))
  
    def max_length(self) :
        max_len = 0
        lengths = []
        for sent in self.sentences:
            input_ids = self.tokenizer.encode(sent)
            max_len = max(max_len, len(input_ids))
            lengths.append(min(512, len(input_ids)))
        max_len = min(512, max_len)
        return max_len, lengths

In [1]:
train_dataloader = DataLoader(EmoBank(type="train"), batch_size=batch_size, shuffle=True)
dev_dataloader = DataLoader(EmoBank(type="dev"), batch_size=batch_size, shuffle=False)
test_dataloader = DataLoader(EmoBank(type="test"), batch_size=batch_size, shuffle=False)

NameError: name 'DataLoader' is not defined

In [4]:
class AAN(nn.Module) :
    def __init__(self, embed_size=768, hidden_size=150) :
        #['V', 'A', 'D', 'S']
        super(AAN, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.feature_size = self.hidden_size*4

        self.bert = BertModel.from_pretrained("bert-base-cased")
        for name, param in self.bert.named_parameters() :
          if "11" not in name :
              param.requires_grad = False

        self.attention_1 = Attention(self.embed_size)
        self.attention_2 = Attention(self.embed_size)
        self.attention_s = Attention(self.embed_size)

        self.features = FeatureExtaction(self.embed_size, self.hidden_size)
        
        self.regression_1 = EmotionRegression(self.feature_size * 2)
        self.regression_2 = EmotionRegression(self.feature_size * 2)

        self.discriminator = Discriminator(self.feature_size)
    
    def forward(self,input_ids, attn_masks, token_type_ids, source_lengths) :
        sentences = self.bert(input_ids, attn_masks, token_type_ids)[0]

        sentences_1 = self.attention_1(sentences, source_lengths)
        sentences_2 = self.attention_2(sentences, source_lengths)
        sentences_s = self.attention_s(sentences, source_lengths)

        features_1 = self.features(sentences_1, source_lengths)
        features_2 = self.features(sentences_2, source_lengths)
        features_s = self.features(sentences_s, source_lengths)

        value_1 = self.regression_1(torch.cat((features_1, features_s), dim=1))
        value_2 = self.regression_2(torch.cat((features_2, features_s), dim=1))

        p1, p2 = self.discriminator(features_1), self.discriminator(features_2)
        
        return value_1, value_2, p1, p2


In [13]:
class Train() :
    def __init__(self, type=[0,1]) :
        super(Train, self).__init__()
        self.type = type
        self.model = AAN()
        self.model.cuda()
        self.init_weights()
        self.mse = nn.MSELoss()
        self.bert_optim = AdamW(self.model.bert.parameters(), lr=2e-5)
        self.attention_optim = optim.Adam(
            list(self.model.attention_1.parameters())+
            list(self.model.attention_2.parameters())+
            list(self.model.attention_s.parameters()), lr=lr_attn)
        self.attention_optim_adversarial = optim.Adam(
            list(self.model.attention_1.parameters())+
            list(self.model.attention_2.parameters())+
            list(self.model.attention_s.parameters()), lr=lr_attn)
        self.feature_optim = optim.Adam(self.model.features.parameters(), lr=lr_feature)
        self.regressor_optim = optim.RMSprop(
            list(self.model.regression_1.parameters())+
            list(self.model.regression_1.parameters()), lr=lr_regressor)
        self.discriminator_optim = optim.RMSprop(self.model.discriminator.parameters(), lr=lr_discriminator)
        self.training_stats = []
    
    def init_weights(self) :
        for name, param in self.model.named_parameters() :
            if 'bert' in name :
                continue
            temp = np.sqrt(6.0/(sum([i for i in param.shape])+1e-8))
            param.data.uniform_(-temp, temp)
    
    def run_model(self, batch) :
        input_ids = batch[0].to(device)
        attention_masks = batch[1].to(device)
        token_type_ids = batch[2].to(device)
        source_lengths = batch[4].to(device)
        value_1, value_2, p1, p2 = self.model(input_ids, attention_masks, token_type_ids, source_lengths)
        output = torch.cat((value_1, value_2), dim=1)
        return output, p1, p2
    
    def get_r(self, output, target) :
        temp = [ stats.pearsonr(output[:,i].cpu().detach(), target[:,i].cpu().detach())[0] for i in range(2)]
        return temp
    
    def train(self, epochs=epochs) :
        mse = nn.MSELoss()
        total_steps = len(train_dataloader) * epochs
        scheduler = get_linear_schedule_with_warmup(self.bert_optim, 
                                            num_warmup_steps = 0,
                                            num_training_steps = total_steps)
        for epoch_i in range(epochs) :
            print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
            self.model.train()
            train_r = []
            train_loss = []
            for i, batch in enumerate(train_dataloader) :

                self.attention_optim.zero_grad()
                self.feature_optim.zero_grad()
                self.regressor_optim.zero_grad()
                self.bert_optim.zero_grad()

                target = batch[3].to(device)
                output, p1, p2 = self.run_model(batch)
                reg_loss = mse(output, target)
                train_loss.append(reg_loss.item())
                reg_loss.backward()
                torch.nn.utils.clip_grad_norm_(self.model.bert.parameters(), 1.0)

                self.attention_optim.step()
                self.feature_optim.step()
                self.regressor_optim.step()
                self.bert_optim.step()

                self.discriminator_optim.zero_grad()

                output, p1, p2 = self.run_model(batch)
                wloss = (p2-p1).mean()
                wloss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                
                self.discriminator_optim.step()

                self.attention_optim_adversarial.zero_grad()
                
                output, p1, p2 = self.run_model(batch)
                adversarial_loss = (p1-p2).mean()
                adversarial_loss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                
                self.attention_optim_adversarial.step()

                train_r.append(self.get_r(output, target))
                if i%10 == 0 :
                    print("Batch: {} train Loss: {} train_r: {}".format(i, reg_loss, train_r[-1]))

            val_loss = []
            val_r = []
            for i, batch in enumerate(dev_dataloader) :
                with torch.no_grad() :
                    target = batch[3].to(device)
                    output, p1, p2 = self.run_model(batch)

                    reg_loss = mse(output, target)
                    val_loss.append(reg_loss.item())
                    val_r.append(self.get_r(output, target))
                    if i%10 == 0 :
                        print("Batch: {} val Loss: {} val_r: {}".format(i, reg_loss, val_r[-1]))

            self.training_stats.append({
                'training loss' : sum(train_loss)/len(train_loss),
                'validation loss' : sum(val_loss)/len(val_loss),
                'train r' : torch.tensor(train_r).mean(dim=0),
                'val r' : torch.tensor(val_r).mean(dim=0),
            })
            print(self.training_stats[-1])
            
        def save_model(self) :
            #TODO: Add function to save model
            pass
        
        def plot(self, r_values) :
            #TODO: Plot r values
            pass

In [1]:
train = Train()
train.train(50)

NameError: name 'train' is not defined

In [20]:
model_state_dict = {
        "attention_1":train.model.attention_1.state_dict(),
        "attention_2":train.model.attention_2.state_dict(),
        "attention_s":train.model.attention_s.state_dict(),
        "features":train.model.features.state_dict(),   
        "regression_1":train.model.regression_1.state_dict(),
        "regression_2":train.model.regression_1.state_dict(),
        "discriminator":train.model.discriminator.state_dict()
        }
torch.save(model_state_dict, "VA_model.pt")

In [64]:
val_loss = []
val_r = []
mse = nn.MSELoss()
training_stats = []
for i, batch in enumerate(test_dataloader) :
    with torch.no_grad() :
        target = batch[3].to(device)
        output, p1, p2 = train.run_model(batch)

        reg_loss = mse(output, target)
        val_loss.append(reg_loss.item())
        val_r.append(train.get_r(output, target))
        if i%10 == 0 :
            print("Batch: {} val Loss: {} val_r: {}".format(i, reg_loss, val_r[-1]))

# training_stats.append({
#     'validation loss' : sum(val_loss)/len(val_loss),
#     'val r' : torch.tensor(val_r).mean(dim=0),
# })

Batch: 0 val Loss: 0.07396862655878067 val_r: [0.47933176870423644, 0.3004015398340143]


In [60]:
def plot(value1, value2, label, index=None) :
    sns.set(style='darkgrid')
    sns.set(font_scale=1.5)
    # plt.rcParams["figure.figsize"] = (12,6)
    if index is not None: 
        value1 = [i[index] for i in value1]
        value2 = [i[index] for i in value2]
    plt.plot(value1, 'b-o', label="Training")
    plt.plot(value2, 'g-o', label="Validation")

    plt.title("Training & Validation "+format(label))
    plt.xlabel("Epoch")
    plt.ylabel(label)
    plt.legend()
    # plt.xticks(len(value1))

    plt.show()

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
df_stats = pd.DataFrame(data=train.training_stats)
plot(df_stats["training loss"].values[10:],df_stats["validation loss"].values[10:],"Loss")
plot(df_stats["train r"],df_stats["val r"],"Valence R Value", index=0)
plot(df_stats["train r"],df_stats["val r"],"Dominance R Value", index=1)

NameError: name 'pd' is not defined