In [1]:
import json
import torch
import pickle
import numpy as np
import torch.nn as nn
from scipy import stats
import torch.optim as optim
from torch.utils.data import TensorDataset, DataLoader
from generate_embeddings import *
from nn_utils import Attention, Discriminator, EmotionRegression, FeatureExtaction

In [7]:
batch_size = 64
lr_attn = 1e-4
lr_feature = 8e-5
lr_regressor = 4e-5
lr_discriminator = 4e-5
epochs = 100
device = "cuda:0" if torch.cuda.is_available() else "cpu"
print(device)

cuda:0


In [3]:
dataset = Dataset()
vocab = dataset.get_vocab().to(device)
train_dataset = dataset.read_data('./data/train.csv') 
dev_dataset = dataset.read_data('./data/dev.csv')
test_dataset = dataset.read_data('./data/test.csv')

train_dataloader = DataLoader(
    train_dataset,
    shuffle=True,
    batch_size=batch_size
    )
val_dataloader = DataLoader(
    dev_dataset,
    shuffle=False,
    batch_size=batch_size
    )
test_dataloader = DataLoader(
    test_dataset,
    shuffle=False,
    batch_size=batch_size
    )

Vocab: torch.Size([17339, 300])
100%|██████████| 8061/8061 [00:28<00:00, 283.45it/s]
  4%|▍         | 43/1000 [00:00<00:02, 428.42it/s]Tokens: torch.Size([8061, 109]) Lengths: torch.Size([8061]) Target: torch.Size([8061, 3])
100%|██████████| 1000/1000 [00:02<00:00, 347.67it/s]
  3%|▎         | 28/1000 [00:00<00:03, 275.36it/s]Tokens: torch.Size([1000, 73]) Lengths: torch.Size([1000]) Target: torch.Size([1000, 3])
100%|██████████| 1000/1000 [00:03<00:00, 257.21it/s]Tokens: torch.Size([1000, 134]) Lengths: torch.Size([1000]) Target: torch.Size([1000, 3])



In [4]:
class AAN(nn.Module) :
    def __init__(self, embed_size=300, hidden_size=150) :
        #['V', 'A', 'D', 'S']
        super(AAN, self).__init__()
        self.embed_size = embed_size
        self.hidden_size = hidden_size
        self.feature_size = self.hidden_size*4

        self.attention_1 = Attention(self.embed_size)
        self.attention_2 = Attention(self.embed_size)
        self.attention_s = Attention(self.embed_size)

        self.features = FeatureExtaction(self.embed_size, self.hidden_size)
        
        self.regression_1 = EmotionRegression(self.feature_size * 2)
        self.regression_2 = EmotionRegression(self.feature_size * 2)

        self.discriminator = Discriminator(self.feature_size)
    
    def forward(self, vocab, sentences, source_lengths) :
        sentences = vocab(sentences)
        sentences = sentences.detach()

        sentences_1 = self.attention_1(sentences, source_lengths)
        sentences_2 = self.attention_2(sentences, source_lengths)
        sentences_s = self.attention_s(sentences, source_lengths)

        features_1 = self.features(sentences_1, source_lengths)
        features_2 = self.features(sentences_2, source_lengths)
        features_s = self.features(sentences_s, source_lengths)

        value_1 = self.regression_1(torch.cat((features_1, features_s), dim=1))
        value_2 = self.regression_2(torch.cat((features_2, features_s), dim=1))

        p1, p2 = self.discriminator(features_1), self.discriminator(features_2)
        
        return value_1, value_2, p1, p2


In [13]:
class Train() :
    def __init__(self, type=[0,1]) :
        super(Train, self).__init__()
        self.type = type
        self.model = AAN().to(device)
        # self.init_weights()
        self.mse = nn.MSELoss()
        self.attention_optim = optim.Adam(
            list(self.model.attention_1.parameters())+
            list(self.model.attention_2.parameters())+
            list(self.model.attention_s.parameters()), lr=lr_attn)
        self.attention_optim_adversarial = optim.Adam(
            list(self.model.attention_1.parameters())+
            list(self.model.attention_2.parameters())+
            list(self.model.attention_s.parameters()), lr=lr_attn)
        self.feature_optim = optim.Adam(self.model.features.parameters(), lr=lr_feature)
        self.regressor_optim = optim.RMSprop(
            list(self.model.regression_1.parameters())+
            list(self.model.regression_1.parameters()), lr=lr_regressor)
        self.discriminator_optim = optim.RMSprop(self.model.discriminator.parameters(), lr=lr_discriminator)
        self.training_stats = []
    
    def init_weights(self) :
        for param in self.model.parameters() :
            temp = np.sqrt(6.0/(sum([i for i in param.shape])+1e-8))
            param.data.uniform_(-temp, temp)
    
    def run_model(self, batch) :
        sentences = batch[0].to(device)
        source_lengths = batch[1].to(device)
        target = batch[2].to(device)
        value_1, value_2, p1, p2 = self.model(vocab, sentences, source_lengths)
        output = torch.cat((value_1, value_2), dim=1)
        target = target[:,self.type].float()
        return output, p1, p2, target
    
    def get_r(self, output, target) :
        temp = [ stats.pearsonr(output[:,i].cpu().detach(), target[:,i].cpu().detach())[0] for i in range(2)]
        return temp
    
    def train(self, epochs=epochs) :
        mse = nn.MSELoss()
        for epoch_i in range(epochs) :
            print('======== Epoch {:} / {:} ========'.format(epoch_i + 1, epochs))
            self.model.train()
            train_r = []
            train_loss = []
            for i, batch in enumerate(train_dataloader) :

                self.attention_optim.zero_grad()
                self.feature_optim.zero_grad()
                self.regressor_optim.zero_grad()

                output, p1, p2, target = self.run_model(batch)
                reg_loss = mse(output, target)
                train_loss.append(reg_loss.item())
                reg_loss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)

                self.attention_optim.step()
                self.feature_optim.step()
                self.regressor_optim.step()

                self.discriminator_optim.zero_grad()

                output, p1, p2, target = self.run_model(batch)
                wloss = (p2-p1).mean()
                wloss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                
                self.discriminator_optim.step()

                self.attention_optim_adversarial.zero_grad()
                
                output, p1, p2, target = self.run_model(batch)
                adversarial_loss = (p1-p2).mean()
                adversarial_loss.backward()
                # torch.nn.utils.clip_grad_norm_(self.model.parameters(), 1.0)
                
                self.attention_optim_adversarial.step()

                train_r.append(self.get_r(output, target))
                if i%10 == 0 :
                    print("Batch: {} train Loss: {} train_r: {}".format(i, reg_loss, train_r[-1]))

            val_loss = []
            val_r = []
            for i, batch in enumerate(val_dataloader) :
                with torch.no_grad() :
                    output, p1, p2, target = self.run_model(batch)

                    reg_loss = mse(output, target)
                    val_loss.append(reg_loss.item())
                    val_r.append(self.get_r(output, target))
                    if i%10 == 0 :
                        print("Batch: {} val Loss: {} val_r: {}".format(i, reg_loss, val_r[-1]))

            self.training_stats.append({
                'training loss' : sum(train_loss)/len(train_loss),
                'validation loss' : sum(val_loss)/len(val_loss),
                'train r' : torch.tensor(train_r).mean(dim=0).item(),
                'val r' : torch.tensor(val_r).mean(dim=0).item(),
            })
            print(json.dumps(self.training_stats[-1], ident=4))
            
        def save_model(self) :
            #TODO: Add function to save model
            pass
        
        def plot(self, r_values) :
            #TODO: Plot r values
            pass

In [17]:
# train = Train()
train.train(10)

Batch: 0 train Loss: 0.07828138768672943 train_r: [0.5775601206275207, 0.3465344150288597]
Batch: 10 train Loss: 0.06599719077348709 train_r: [0.46358762226571854, 0.19410596117246268]
Batch: 20 train Loss: 0.06102125346660614 train_r: [0.5300792457119532, 0.2608446691863796]
Batch: 30 train Loss: 0.05710688233375549 train_r: [0.38688727529512734, 0.23934299925966485]
Batch: 40 train Loss: 0.06419407576322556 train_r: [0.536086566328229, 0.2156681558311598]
Batch: 50 train Loss: 0.08376166224479675 train_r: [0.42012290905567484, 0.2908518252889128]
Batch: 60 train Loss: 0.09530360251665115 train_r: [0.43838315659571647, 0.23874884288153073]
Batch: 70 train Loss: 0.10118158906698227 train_r: [0.47525394870867854, 0.49035453684012703]
Batch: 80 train Loss: 0.11314847320318222 train_r: [0.43082864943182975, 0.39633179916840566]
Batch: 90 train Loss: 0.0939970463514328 train_r: [0.6229797964651029, 0.47433017897496055]
Batch: 100 train Loss: 0.06778638064861298 train_r: [0.6530572937852828

In [20]:
model_state_dict = {
        "attention_1":train.model.attention_1.state_dict(),
        "attention_2":train.model.attention_2.state_dict(),
        "attention_s":train.model.attention_s.state_dict(),
        "features":train.model.features.state_dict(),   
        "regression_1":train.model.regression_1.state_dict(),
        "regression_2":train.model.regression_1.state_dict(),
        "discriminator":train.model.discriminator.state_dict()
        }
torch.save(model_state_dict, "VA_model.pt")

In [64]:
test_dataloader = DataLoader(
    test_dataset,
    shuffle=False,
    batch_size=10000
    )
val_loss = []
val_r = []
mse = nn.MSELoss()
training_stats = []
for i, batch in enumerate(test_dataloader) :
    with torch.no_grad() :
        output, p1, p2, target = train.run_model(batch)

        reg_loss = mse(output, target)
        val_loss.append(reg_loss.item())
        val_r.append(train.get_r(output, target))
        if i%10 == 0 :
            print("Batch: {} val Loss: {} val_r: {}".format(i, reg_loss, val_r[-1]))

# training_stats.append({
#     'validation loss' : sum(val_loss)/len(val_loss),
#     'val r' : torch.tensor(val_r).mean(dim=0),
# })

Batch: 0 val Loss: 0.07396862655878067 val_r: [0.47933176870423644, 0.3004015398340143]


In [60]:
def plot(value1, value2, label, index=None) :
    sns.set(style='darkgrid')
    sns.set(font_scale=1.5)
    # plt.rcParams["figure.figsize"] = (12,6)
    if index is not None: 
        value1 = [i[index] for i in value1]
        value2 = [i[index] for i in value2]
    plt.plot(value1, 'b-o', label="Training")
    plt.plot(value2, 'g-o', label="Validation")

    plt.title("Training & Validation "+format(label))
    plt.xlabel("Epoch")
    plt.ylabel(label)
    plt.legend()
    # plt.xticks(len(value1))

    plt.show()

In [1]:
import seaborn as sns
import matplotlib.pyplot as plt
df_stats = pd.DataFrame(data=train.training_stats)
plot(df_stats["training loss"].values[10:],df_stats["validation loss"].values[10:],"Loss")
plot(df_stats["train r"],df_stats["val r"],"Valence R Value", index=0)
plot(df_stats["train r"],df_stats["val r"],"Dominance R Value", index=1)

NameError: name 'pd' is not defined