In [0]:
# !pip install pytorch 
# !pip install torchtext

In [2]:
%tensorflow_version 2.x
import tensorflow as tf
from tensorflow.keras.datasets import reuters
import numpy as np
from tensorflow.keras.utils import to_categorical

VOCAB_SIZE = 10000

(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=VOCAB_SIZE)
word_index = reuters.get_word_index()
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
# Note that our indices were offset by 3
# because 0, 1 and 2 are reserved indices for "padding", "start of sequence", and "unknown".
decoded_newswire = ' '.join([reverse_word_index.get(i - 3, '?') for i in train_data[0]])
print(decoded_newswire)


def vectorize_sequences(sequences, dimension=VOCAB_SIZE):
    results = np.zeros((len(sequences), dimension))
    for i, sequence in enumerate(sequences):
        results[i, sequence] = 1.
    return results

# Our vectorized training+test data
x_train = vectorize_sequences(train_data)
x_test = vectorize_sequences(test_data)

# Our vectorized training+test label 
one_hot_train_labels = to_categorical(train_labels)
one_hot_test_labels = to_categorical(test_labels)

# split original train set into train + valid 
x_val = x_train[:300]
partial_x_train = x_train[300:]
y_val = one_hot_train_labels[:300]
partial_y_train = one_hot_train_labels[300:]

D_in, H, D_out = partial_x_train.shape[1], 128, len(set(train_labels))

print("- size train/dev/test:", len(partial_x_train), len(x_val), len(x_test))


TensorFlow 2.x selected.
? ? ? said as a result of its december acquisition of space co it expects earnings per share in 1987 of 1 15 to 1 30 dlrs per share up from 70 cts in 1986 the company said pretax net should rise to nine to 10 mln dlrs from six mln dlrs in 1986 and rental operation revenues to 19 to 22 mln dlrs from 12 5 mln dlrs it said cash flow per share this year should be 2 50 to three dlrs reuter 3
- size train/dev/test: 8682 300 2246


In [3]:
# -*- coding: utf-8 -*-
import random
import torch
import numpy as np
from torch import nn
import copy 

seed = 100
random.seed(seed)
np.random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
torch.cuda.manual_seed_all(seed)
torch.backends.cudnn.deterministic = True

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(torch.cuda.get_device_name(device=device))
print(device)

Tesla T4
cuda


In [0]:
class ReutersDataset(torch.utils.data.TensorDataset):
    """Reuters dataset."""

    def __init__(self, data, label, device=None):
        """
        Args:
            data (torch.Tensor): data sample using Tensor type.
            label (torch.Tensor): label sample using Tensor type 
        """
        self.data = data.to(device) if device is not None else data
        self.label = label if device is not None else label
        self.size = len(self.data)

    def __len__(self):
        return self.size

    def __getitem__(self, idx):
        return self.data[idx], self.label[idx] 


In [0]:
import torch.nn.functional as F
from torch.autograd import Variable

import torch
import torch.autograd as autograd
import torch.nn as nn
# import torch.functional as F
import torch.optim as optim

from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence


class FFN(torch.nn.Module):
    def __init__(self, D_in, H, D_out):
        """
        In the constructor we construct three nn.Linear instances that we will use
        in the forward pass.
        """
        super(FFN, self).__init__()

        self.linear1 = torch.nn.Linear(D_in, H)
        self.activation1 = torch.nn.Tanh()

        self.linear2 = torch.nn.Linear(H,  int(H/2))
        self.activation2 = torch.nn.Tanh()

        self.output_linear = torch.nn.Linear(int(H/2), D_out)
        self.softmax = torch.nn.Softmax(dim=1)
        self.apply(self._init_weights)
    
    @staticmethod
    def _init_weights(m):
        if type(m) == torch.nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.00)
            
    def forward(self, x): 
        h1 = self.activation1(self.linear1(x))
        h2 = self.activation2(self.linear2(h1))
        return self.softmax(self.output_linear(h2))


class RNNText(nn.Module):

  def __init__(self, vocab_size, emb_dim, class_num, hidden_dim=300, bidirectional=True):

    super(RNNText, self).__init__()

    self.emb_dim = emb_dim
    self.hidden_dim = hidden_dim
    self.vocab_size = vocab_size

    self.embedding = nn.Embedding(vocab_size, emb_dim)
    self.lstm = nn.LSTM(emb_dim, hidden_dim, num_layers=1, bidirectional=bidirectional, batch_first=True)

    self.hidden2out = nn.Linear(hidden_dim, class_num)
    self.softmax = nn.LogSoftmax()

    self.dropout_layer = nn.Dropout(p=0.2)


  def init_hidden(self, batch_size):
    return(autograd.Variable(torch.randn(1, batch_size, self.hidden_dim)),
            autograd.Variable(torch.randn(1, batch_size, self.hidden_dim)))


  def forward(self, batch, lengths=None):
    self.hidden = self.init_hidden(batch.size(-1))

    embeds = self.embedding(batch)
    packed_input = embeds #pack_padded_sequence(embeds, lengths)
    outputs, (ht, ct) = self.lstm(packed_input)

    # ht is the last hidden state of the sequences
    # ht = (1 x batch_size x hidden_dim)
    # ht[-1] = (batch_size x hidden_dim)
    output = self.dropout_layer(ht[-1])
    output = self.hidden2out(output)
    output = self.softmax(output)

    return output

class CNNText(nn.Module):
    
    def __init__(self, vocab_size, emb_dim, class_num, channels_out=32, kernel_sizes=(3, 4, 5), dropout=0.05):
        super(CNNText, self).__init__()
        
        V = vocab_size
        D = emb_dim
        C = class_num
        Ci = 1
        Co = channels_out
        Ks = kernel_sizes

        self.embed = nn.Embedding(V, D)
        self.convs1 = nn.ModuleList([nn.Conv2d(Ci, Co, (K, D)) for K in Ks])
        '''
        self.conv13 = nn.Conv2d(Ci, Co, (3, D))
        self.conv14 = nn.Conv2d(Ci, Co, (4, D))
        self.conv15 = nn.Conv2d(Ci, Co, (5, D))
        '''
        self.dropout = nn.Dropout(dropout)
        self.fc1 = nn.Linear(len(Ks)*Co, C)

        self.apply(self._init_weights)
    
    @staticmethod
    def _init_weights(m):
        if type(m) == torch.nn.Linear:
            torch.nn.init.xavier_uniform_(m.weight)
            m.bias.data.fill_(0.00)
        elif type(m) == torch.nn.Embedding:
            torch.nn.init.xavier_uniform_(m.weight)

    def conv_and_pool(self, x, conv):
        x = F.relu(conv(x)).squeeze(3)  # (N, Co, W)
        x = F.max_pool1d(x, x.size(2)).squeeze(2)
        return x

    def forward(self, x):
        x = self.embed(x)  # (N, W, D)
        
        # if self.args.static:
        #     x = Variable(x)

        x = x.unsqueeze(1)  # (N, Ci, W, D)

        x = [F.relu(conv(x)).squeeze(3) for conv in self.convs1]  # [(N, Co, W), ...]*len(Ks)

        x = [F.max_pool1d(i, i.size(2)).squeeze(2) for i in x]  # [(N, Co), ...]*len(Ks)

        x = torch.cat(x, 1)

        '''
        x1 = self.conv_and_pool(x,self.conv13) #(N,Co)
        x2 = self.conv_and_pool(x,self.conv14) #(N,Co)
        x3 = self.conv_and_pool(x,self.conv15) #(N,Co)
        x = torch.cat((x1, x2, x3), 1) # (N,len(Ks)*Co)
        '''
        x = self.dropout(x)  # (N, len(Ks)*Co)
        logit = self.fc1(x)  # (N, C)
        return logit


In [0]:

# evaluate model in training phase or testing phase 
def evaluate(model_nn, data_loader):

  # using no_grad to prevent the conflict in training phase
  # in this setting, there is not any gradient computations 
  with torch.no_grad():
    total_loss = 0
    total_acc = 0
    total_sample = 0
    total_predictions = torch.LongTensor([]).to(device)
    model_nn.eval()

    # compute each minibatch in test/valid set
    for mini_batch in data_loader:
      # infer steps similar to training phase
      inputs, labels = mini_batch
      inputs = inputs.to(device)
      labels = labels.to(device)
      predictions = model_nn(inputs)
      lb_predictions = torch.max(predictions, dim=1)[1]

      # calculate loss and accuracy 
      loss = criterion(predictions, labels)
      acc = (lb_predictions == labels).sum()

      # gather all result for each mini_batch  
      total_predictions = torch.cat((total_predictions, lb_predictions))
      total_sample += len(labels)
      total_loss += loss.item()
      total_acc += acc.item()

    return total_loss/len(data_loader), total_acc / total_sample, total_predictions


def train(model, criterion, optimizer, trainloader, validloader, N_EPOCHS = 11):
    # training  model with some epoch 
    for epoch in range(N_EPOCHS):  
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            model.train()

            # get the inputs and label 
            inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 200 == 199:    # print every 200 mini-batches
                print('[Epoch %d/%d, Iter %5d] train loss: %.3f' %
                      (epoch + 1, N_EPOCHS, i + 1, running_loss / 200))
                running_loss = 0.0

                # 
                valid_loss, valid_acc, _ = evaluate(model, validloader)
                print('[Epoch %d, Iter %5d] valid loss: %.3f - acc: %.3f' %
                      (epoch + 1, i + 1, valid_loss, valid_acc))
    print('Finished Training')
    return model 


In [7]:
trainloader = torch.utils.data.DataLoader(ReutersDataset(torch.Tensor(partial_x_train),
                                                         torch.LongTensor(train_labels[300:]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=True, num_workers=2)

validloader = torch.utils.data.DataLoader(ReutersDataset(torch.Tensor(x_val),
                                                         torch.LongTensor(train_labels[:300]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)

testloader = torch.utils.data.DataLoader(ReutersDataset(torch.Tensor(x_test),
                                                        torch.LongTensor(test_labels), 
                                                        device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)



# Construct our model by instantiating the class defined above
model = FFN(D_in, H, D_out)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# gpu load
model = model.to(device)
print(model)

# training 
model = train(model, criterion, optimizer, trainloader, validloader, N_EPOCHS = 11)

# evaluate
test_loss, test_acc, lb_predictions = evaluate(model, testloader)
print('- test loss: %.3f \n- test acc: %.3f' % (test_loss, test_acc*100))

print("- 30 first predictions + gold labels:")
print(lb_predictions[:30])
print(test_labels[:30])


FFN(
  (linear1): Linear(in_features=10000, out_features=128, bias=True)
  (activation1): Tanh()
  (linear2): Linear(in_features=128, out_features=64, bias=True)
  (activation2): Tanh()
  (output_linear): Linear(in_features=64, out_features=46, bias=True)
  (softmax): Softmax(dim=1)
)
[Epoch 1/11, Iter   200] train loss: 3.327
[Epoch 1, Iter   200] valid loss: 3.201 - acc: 0.667
[Epoch 2/11, Iter   200] train loss: 3.149
[Epoch 2, Iter   200] valid loss: 3.143 - acc: 0.730
[Epoch 3/11, Iter   200] train loss: 3.105
[Epoch 3, Iter   200] valid loss: 3.108 - acc: 0.773
[Epoch 4/11, Iter   200] train loss: 3.033
[Epoch 4, Iter   200] valid loss: 3.058 - acc: 0.813
[Epoch 5/11, Iter   200] train loss: 3.003
[Epoch 5, Iter   200] valid loss: 3.054 - acc: 0.817
[Epoch 6/11, Iter   200] train loss: 2.983
[Epoch 6, Iter   200] valid loss: 3.034 - acc: 0.840
[Epoch 7/11, Iter   200] train loss: 2.970
[Epoch 7, Iter   200] valid loss: 3.033 - acc: 0.847
[Epoch 8/11, Iter   200] train loss: 2.959

In [8]:

# define padding method 
def padding_word(mini_batch,  padding_id = 0, max_length=-1):
  new_data = []
  if max_length==-1: 
    # get max sequence length in data 
    for doc in mini_batch:
      if max_length < len(doc):
        max_length = len(doc)

  for i, doc in enumerate(mini_batch):
    if len(doc) <= max_length:
      new_doc = doc + [padding_id]*(max_length - len(doc))
    else:
      new_doc = doc[:max_length]
    new_data.append(list(new_doc))

  return np.array(new_data) 
 
# padding and cut some words in sentence whose lengh greater MAX_DOC_LENGTH
MAX_DOC_LENGTH = 300
padding_train_data = padding_word(train_data, max_length=MAX_DOC_LENGTH)
padding_test_data = padding_word(test_data, max_length=MAX_DOC_LENGTH)

trainloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_train_data[300:]),
                                                         torch.LongTensor(train_labels[300:]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=True, num_workers=2)

validloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_train_data[:300]),
                                                         torch.LongTensor(train_labels[:300]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)

testloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_test_data),
                                                         torch.LongTensor(test_labels), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)

# define model + optimize method 
model = CNNText(vocab_size=VOCAB_SIZE, emb_dim=200, class_num=D_out)
model = model.to(device)
print(model)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 
# training model 
model = train(model, criterion, optimizer, trainloader, validloader, N_EPOCHS = 8)

# evaluate
test_loss, test_acc, lb_predictions = evaluate(model, testloader)
print('- test loss: %.3f \n- test acc: %.3f' % (test_loss, test_acc*100))

print("- 30 first predictions + gold labels:")
print(lb_predictions[:30])
print(test_labels[:30])

CNNText(
  (embed): Embedding(10000, 200)
  (convs1): ModuleList(
    (0): Conv2d(1, 32, kernel_size=(3, 200), stride=(1, 1))
    (1): Conv2d(1, 32, kernel_size=(4, 200), stride=(1, 1))
    (2): Conv2d(1, 32, kernel_size=(5, 200), stride=(1, 1))
  )
  (dropout): Dropout(p=0.05, inplace=False)
  (fc1): Linear(in_features=96, out_features=46, bias=True)
)
[Epoch 1/8, Iter   200] train loss: 2.007
[Epoch 1, Iter   200] valid loss: 1.329 - acc: 0.703
[Epoch 2/8, Iter   200] train loss: 1.079
[Epoch 2, Iter   200] valid loss: 0.980 - acc: 0.800
[Epoch 3/8, Iter   200] train loss: 0.729
[Epoch 3, Iter   200] valid loss: 0.805 - acc: 0.833
[Epoch 4/8, Iter   200] train loss: 0.489
[Epoch 4, Iter   200] valid loss: 0.753 - acc: 0.827
[Epoch 5/8, Iter   200] train loss: 0.340
[Epoch 5, Iter   200] valid loss: 0.758 - acc: 0.830
[Epoch 6/8, Iter   200] train loss: 0.236
[Epoch 6, Iter   200] valid loss: 0.776 - acc: 0.830
[Epoch 7/8, Iter   200] train loss: 0.185
[Epoch 7, Iter   200] valid loss

In [9]:

# define padding method 
def padding_word(mini_batch,  padding_id = 0, max_length=-1):
  new_data = []
  if max_length==-1: 
    # get max sequence length in data 
    for doc in mini_batch:
      if max_length < len(doc):
        max_length = len(doc)

  for i, doc in enumerate(mini_batch):
    if len(doc) <= max_length:
      new_doc = doc + [padding_id]*(max_length - len(doc))
    else:
      new_doc = doc[:max_length]
    new_data.append(list(new_doc))

  return np.array(new_data) 
 
# padding and cut some words in sentence whose lengh greater MAX_DOC_LENGTH
MAX_DOC_LENGTH = 150
padding_train_data = padding_word(train_data, max_length=MAX_DOC_LENGTH)
padding_test_data = padding_word(test_data, max_length=MAX_DOC_LENGTH)

trainloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_train_data[300:]),
                                                         torch.LongTensor(train_labels[300:]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=True, num_workers=2)

validloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_train_data[:300]),
                                                         torch.LongTensor(train_labels[:300]), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)

testloader = torch.utils.data.DataLoader(ReutersDataset(torch.LongTensor(padding_test_data),
                                                         torch.LongTensor(test_labels), 
                                                         device=None), 
                                          batch_size=32,
                                          shuffle=False, num_workers=2)

# define model + optimize method  vocab_size, embedding_dim, hidden_dim, output_size
model = RNNText(vocab_size=VOCAB_SIZE, emb_dim=100, hidden_dim=100, class_num=D_out)
model = model.to(device)
print(model)

criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
 
# training model 
model = train(model, criterion, optimizer, trainloader, validloader, N_EPOCHS = 15)

# evaluate
test_loss, test_acc, lb_predictions = evaluate(model, testloader)
print('- test loss: %.3f \n- test acc: %.3f' % (test_loss, test_acc*100))

print("- 30 first predictions + gold labels:")
print(lb_predictions[:30])
print(test_labels[:30])

RNNText(
  (embedding): Embedding(10000, 100)
  (lstm): LSTM(100, 100, batch_first=True, bidirectional=True)
  (hidden2out): Linear(in_features=100, out_features=46, bias=True)
  (softmax): LogSoftmax()
  (dropout_layer): Dropout(p=0.2, inplace=False)
)




[Epoch 1/15, Iter   200] train loss: 2.038
[Epoch 1, Iter   200] valid loss: 1.659 - acc: 0.577
[Epoch 2/15, Iter   200] train loss: 1.585
[Epoch 2, Iter   200] valid loss: 1.507 - acc: 0.610
[Epoch 3/15, Iter   200] train loss: 1.368
[Epoch 3, Iter   200] valid loss: 1.326 - acc: 0.680
[Epoch 4/15, Iter   200] train loss: 1.104
[Epoch 4, Iter   200] valid loss: 1.135 - acc: 0.747
[Epoch 5/15, Iter   200] train loss: 0.909
[Epoch 5, Iter   200] valid loss: 1.057 - acc: 0.780
[Epoch 6/15, Iter   200] train loss: 0.765
[Epoch 6, Iter   200] valid loss: 1.077 - acc: 0.767
[Epoch 7/15, Iter   200] train loss: 0.656
[Epoch 7, Iter   200] valid loss: 0.971 - acc: 0.767
[Epoch 8/15, Iter   200] train loss: 0.562
[Epoch 8, Iter   200] valid loss: 1.009 - acc: 0.790
[Epoch 9/15, Iter   200] train loss: 0.474
[Epoch 9, Iter   200] valid loss: 1.010 - acc: 0.797
[Epoch 10/15, Iter   200] train loss: 0.403
[Epoch 10, Iter   200] valid loss: 0.987 - acc: 0.790
[Epoch 11/15, Iter   200] train loss: 