In [2]:
import pickle
import random
import numpy as np
import torchtext.vocab as vocab
from carsonNLP.embedding import Vocabulary
from carsonNLP.string_token_functions import *

import torch
import torch.nn as nn
import torch.autograd as autograd
from torch.autograd import Variable
from torch import optim
import torch.nn.functional as F

print('you are using PyTorch version ',torch.__version__)

if torch.cuda.is_available():
    use_cuda = True
    print('you have at least 1 GPU')
else:
    use_cuda = False
    print('no GPUs detected')

%load_ext autoreload
%autoreload 2
%matplotlib inline

you are using PyTorch version  0.4.1
no GPUs detected


In [33]:
f = open("trvaltest/trvaltest_10cl_5-100words_v2.p", "rb")
# f = open( "trvaltest/emotrvaltest6.p", "rb")
training_dict, validation_dict, test_dict, all_data, all_categories  \
= pickle.load(f, encoding="utf-8")
print(len(validation_dict['autism']))
print(validation_dict['happy'][4])

all_categories = ['addiction',
                  'anxiety',
                  'autism',
                  'bipolar',
                  'conversation',
                  'depression',
                  'happy',
                  'jokes',
                  'schizophrenia',
                  'selfharm']

100
group donates 90 defibrillators to indiana state police


In [26]:
index2word, word2index, embedding  = pickle.load(open("embeddings/dicts_embed_min40_folder4.p", "rb"))
vocabClass = Vocabulary()
vocabClass.index2word = index2word
vocabClass.word2index = word2index

In [8]:
class Attn(nn.Module):
    def __init__(self, hidden_size):
        super(Attn, self).__init__()
        
        #self.fc1 = nn.Linear(hidden_size, hidden_size)
        
        self.fc1 = nn.Sequential(
                       nn.Linear(hidden_size*2, hidden_size),
                       #nn.BatchNorm1d(num_features=1), # NEW
                       nn.PReLU(),
                       nn.Linear(hidden_size, 1)
                    )

    def forward(self, hidden, encoder_outputs):
        seq_len = encoder_outputs.size(0)
        this_batch_size = encoder_outputs.size(1)
        # print(' hidden.size(), encoder_outputs.size()', hidden.size(), encoder_outputs.size()) 
        # torch.Size([batch_size, hidden_size]) torch.Size([seq_len, batch_size, hidden_size])
        # Create variable to store attention energies
        attn_energies = Variable(torch.zeros(this_batch_size, seq_len)) # B x S

        if use_cuda:
            attn_energies = attn_energies.cuda()

        # For each batch of encoder outputs
        for b in range(this_batch_size):
            # Calculate energy for each encoder output
            for i in range(seq_len):
                #attn_energies[b, i] = self.score(hidden[:, b], encoder_outputs[i, b].unsqueeze(0))
                attn_energies[b, i] = self.score(hidden[b], encoder_outputs[i, b])

        attn_weights = F.softmax(attn_energies,dim=1).unsqueeze(1) # batch_size,1,seq_len
        
        return attn_weights 
    
    def score(self, hidden, encoder_output):
        #print(hidden.size(), encoder_output.size())
        #concat = torch.cat((hidden, encoder_output), 1)
        concat = torch.cat((hidden, encoder_output))
        energy = self.fc1(concat)
        return energy

class RNN(nn.Module):
    
    def __init__(self, hidden_size, embedding, output_size, num_layers = 3, bidirectional = False, 
                 train_embedding = True , dropout = 0.0):
        
        super(RNN, self).__init__()
        
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.dropout = nn.Dropout(p=dropout) # p – probability of an element to be zeroed. Default: 0.5
        
        embedding = torch.from_numpy(embedding).float()
        
        if use_cuda:
            embedding.cuda()
        
        self.embedding = nn.Embedding(embedding.shape[0], embedding.shape[1])
        self.embedding.weight = nn.Parameter(embedding, requires_grad=train_embedding)
        self.gru = nn.GRU(embedding.shape[1], hidden_size, num_layers, 
                          bidirectional=bidirectional, dropout = dropout)
        
        if bidirectional:
            num_directions = 2
        else:
            num_directions = 1
        
        # make the initial hidden state learnable as well 
        hidden0 = torch.zeros(self.num_layers*num_directions, 1, self.hidden_size)
        self.hidden0 = nn.Parameter(hidden0, requires_grad=True)
        
        self.num_cells = num_layers*num_directions
        
        self.fc_concat = nn.Sequential(
                             nn.Linear(hidden_size * 2, hidden_size),
                             nn.BatchNorm1d(num_features=self.hidden_size)
                             ) 
        
        self.fc1 = nn.Sequential(
                       nn.Linear(self.hidden_size,self.hidden_size),
                       nn.BatchNorm1d(num_features=self.hidden_size),
                       )
        
        self.out = nn.Linear(hidden_size,output_size)
        self.prelu = nn.PReLU()
        self.attn = Attn(hidden_size)

    def forward(self, input_seqs):
        
        batch_size = input_seqs.size(1)
        hidden = self.hidden0.repeat(1, batch_size, 1)
        self.embedded = self.embedding(input_seqs)
        encoder_outputs, last_seq_hidden = self.gru(self.embedded, hidden)
        last_hidden = last_seq_hidden[-1] 
        
        attn_weights = self.attn(last_hidden, encoder_outputs) #  batch_size,1,seq_len
        
        # output of GRU (seq_len, batch_size, hidden_size) -> (batch_size, seq_len, hidden_size)
        encoder_outputs_bsh = encoder_outputs.transpose(0, 1)
        
        # bmm does operation (b,1,s).bmm(b,s,h) = (b,1,h)
        context = torch.bmm(attn_weights,encoder_outputs_bsh) # should be  # B x S=1 x N
        
        # Attentional vector using the RNN hidden state and context vector concatenated together 
        context = context.squeeze(1)       # B x S=1 x H -> Batch Size x Hidden Size
        concat_input = torch.cat((last_hidden, context), 1) # both should be batch_size x hidden_size
        
        concat_output = F.tanh(self.fc_concat(concat_input)) # <hidden_size>

        fc1 = self.fc1(concat_output)

        output = self.out(self.dropout(fc1))
        #output = self.out(self.dropout(self.prelu(fc1)))
        
        return output, fc1 , attn_weights

In [10]:
n_categories = len(all_categories)
print('n_categories =', n_categories)
category = random.choice(all_categories)
print('example of category: ',category)
print(validation_dict[category][0])
print(all_categories)
vocabClass.word2index['thats']

n_categories = 10
example of category:  bipolar
take your metal once a day friend of mine linked this on fb for me
['addiction', 'anxiety', 'autism', 'bipolar', 'conversation', 'depression', 'happy', 'jokes', 'schizophrenia', 'selfharm']


19479

In [47]:
n_hidden = 256
num_layers = 3
bidirectional = False

rnn = RNN(n_hidden, embedding, n_categories, num_layers = num_layers, 
          bidirectional = bidirectional, dropout = 0.2)

criterion = nn.CrossEntropyLoss()

if use_cuda:
    rnn = rnn.cuda()

name = 'trvaltest_10cl_5-100w_embed_min40_v5_' + \
        str(n_hidden) + '_' + str(num_layers)

In [40]:
def eval_validation(vocabClass, verbose=True):
    rnn.train(False)
    count = 0
    total_loss = 0
    total_correct = 0
    
    for category in validation_dict.keys():
        category_count = 0
        category_correct = 0
        category_tensor = Variable(torch.LongTensor([all_categories.index(category)]))
        if use_cuda:
            category_tensor = category_tensor.cuda()
        for line in validation_dict[category]:
            line_tensor = Variable(torch.LongTensor(indexesFromSentence(vocabClass, line))).view(-1,1)
            #print(line_tensor.size())
            if use_cuda:
                line_tensor = line_tensor.cuda()
            output, vector_rep, attn_wts = rnn(line_tensor)
            loss = criterion(output, category_tensor)
            total_loss += loss
            category_count += 1
            count += 1
            topv, topi = output.data.topk(1, 1, True)
            category_index = topi[0][0]
            if category == all_categories[category_index]:
                category_correct += 1
                total_correct += 1
        if verbose:
            print("Category {} accuracy = {:.2f}".format(category, float(category_correct) / category_count))
    avg_loss = float(total_loss)/count
    accuracy = float(total_correct)/count
    
    return avg_loss, accuracy

def predict_line(line, vocabClass, rnn):
        line_tensor = Variable(torch.LongTensor(indexesFromSentence(vocabClass, line))).view(-1,1)
        if use_cuda:
            line_tensor = line_tensor.cuda()
        rnn.train(False)
        output, rep, attn_wts = rnn(line_tensor)
        guess, guess_i = category_from_output(output)
        return guess, output, attn_wts

In [27]:
name

'trvaltest_10cl_5-100w_embed_min40_v5_256_3'

In [48]:
rnn.load_state_dict(torch.load("modelstate/" + name + "_cpu.pth"))
avg_val_loss, val_accuracy = eval_validation(vocabClass)
print('avg_val_loss %.4f ,  val_accuracy %.4f' % (avg_val_loss, val_accuracy)) 



Category addiction accuracy = 0.70
Category happy accuracy = 0.65
Category selfharm accuracy = 0.58
Category schizophrenia accuracy = 0.59
Category bipolar accuracy = 0.44
Category anxiety accuracy = 0.67
Category depression accuracy = 0.34
Category autism accuracy = 0.42
Category jokes accuracy = 0.84
Category conversation accuracy = 0.75
avg_val_loss 1.9858 ,  val_accuracy 0.5980
