In [1]:
import torch
from torch import nn
import torch.nn.functional as F
from torch.optim import Adam, SGD
from torch.utils.data import DataLoader, Dataset
from torch.optim.lr_scheduler import ReduceLROnPlateau, StepLR
from torch.nn.utils import clip_grad_norm_
import pandas as pd
import numpy as np
from sklearn.metrics import classification_report, f1_score, accuracy_score
from tqdm import tqdm_notebook as tqdm
import torchnet as tnt
#from keras.preprocessing.sequence import pad_sequences
import warnings
warnings.filterwarnings('ignore')
from utils import *
import matplotlib.pyplot as plt
%matplotlib inline 
from deep_models import *

Using CNTK backend


using gpu


In [2]:
def train_model(model, data_iter, loss_fun, opt):
    model.train()
    meter = tnt.meter.AverageValueMeter()
    meter.reset()
    y_pred = []
    y_true = []
    for headlines, bodies, labels in tqdm(data_iter):
        opt.zero_grad()
        headlines = torch.from_numpy(headlines).cuda().long()
        bodies = torch.from_numpy(bodies).cuda().long()
        y_true.extend(labels)
        labels = torch.from_numpy(labels).cuda().long()
        out, _, _ = model(headlines, bodies)
        _, index = torch.max(out, dim=1)
        y_pred.extend(index.cpu().data.numpy())
        loss = loss_fun(out, labels)
        loss.backward()
        clip_grad_norm_(model.parameters(), 1.0)
        opt.step()
        meter.add(loss.item())
    return meter.value()[0]

def val_model(model, data_iter, loss_fun):
    model.eval()
    meter = tnt.meter.AverageValueMeter()
    meter.reset()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for headlines, bodies, labels in tqdm(data_iter):
            headlines = torch.from_numpy(headlines).cuda().long()
            bodies = torch.from_numpy(bodies).cuda().long()
            y_true.extend(labels)
            labels = torch.from_numpy(labels).cuda().long()
            out, _, _ = model(headlines, bodies)
            _, index = torch.max(out, dim=1)
            y_pred.extend(index.cpu().data.numpy())
            loss = loss_fun(out, labels)
            meter.add(loss.item())

    model.train()
    return meter.value()[0]

def test_model(model, data_iter):
    model.eval()
    y_pred = []
    y_true = []
    with torch.no_grad():
        for headlines, bodies, labels in tqdm(data_iter):
            headlines = torch.from_numpy(headlines).cuda().long()
            bodies = torch.from_numpy(bodies).cuda().long()
            y_true.extend(labels)
            labels = torch.from_numpy(labels).cuda().long()
            out, _, _ = model(headlines, bodies)
            _, index = torch.max(out, dim=1)
            y_pred.extend(index.cpu().data.numpy())

    model.train()
    print('classification report:')
    print('accuracy: %.3f' % accuracy_score(y_true, y_pred))
    print(classification_report(y_true, y_pred))
    print('macro f1: %.3f' % f1_score(y_true, y_pred, average='macro'))
    print('score: %.3f' % (get_score(y_true, y_pred) / get_score(y_true, y_true)))
    
def get_proba(model, data_iter):
    model.eval()
    y_pred = []
    with torch.no_grad():
        for headlines, bodies, labels in tqdm(data_iter):
            headlines = torch.from_numpy(headlines).cuda().long()
            bodies = torch.from_numpy(bodies).cuda().long()
            y_true.extend(labels)
            labels = torch.from_numpy(labels).cuda().long()
            out, _, _ = model(headlines, bodies)
            y_pred.extend(out.cpu().data.numpy())

    model.train()
    return y_pred
    
    
def test_get_batch(data_iter):
    for headlines, bodies, labels in tqdm(data_iter):
        #print(headlines.shape)
        print(bodies.shape)
        #print(labels.shape)
    
def my_plot(data):
    plt.plot(data['train'])
    plt.plot(data['val'])
    plt.legend(['train', 'val'])
    plt.show()
    print('best epoch num: %s loss: %.3f' % (np.argmin(data['val']) +1, min(data['val'])))

#test_get_batch(get_batch('./tmp/val_ids.pkl', batch_size=64, max_len_b=100))

In [3]:
pretrained_file_name = './tmp/pretrained.pkl'
train_filename = './tmp/train_ids.pkl'
val_filename = './tmp/val_ids.pkl'
test_filename = './tmp/test_ids.pkl'
vecs = pickle.load(open(pretrained_file_name, 'rb'))

In [4]:
class DIIN(nn.Module):
    """docstring for DIIN"""
    def __init__(self, pretrained, model_name, hid_size=100, dropout=0.5):
        super(DIIN, self).__init__()
        vocab_size, emb_size = pretrained.shape
        self.hid_size = hid_size
        self.dropout = dropout
        self.embedding = nn.Embedding.from_pretrained(torch.from_numpy(pretrained).float())
        self.gru_enc = nn.LSTM(input_size=emb_size, hidden_size=hid_size, 
            batch_first=True, bidirectional=False)
        
        if model_name == 'diin_my':
            self.features = nn.Sequential(
                nn.Conv2d(hid_size, 64, 1), 

                nn.Conv2d(64, 128, 7, 1, 3, bias=False), 
                nn.BatchNorm2d(128), 
                nn.ReLU(), 
                nn.AdaptiveMaxPool2d((32, 32)),

                nn.Conv2d(128, 128, 3, 1, 1, bias=False), 
                nn.BatchNorm2d(128), 
                nn.ReLU(), 
                nn.AdaptiveMaxPool2d((16, 16)), 
                
                nn.Conv2d(128, 256, 3, 1, 1, bias=False), 
                nn.BatchNorm2d(256), 
                nn.ReLU(), 
                nn.AdaptiveMaxPool2d((8, 8)), 

                nn.Conv2d(256, 256, 3, 1, 1, bias=False), 
                nn.BatchNorm2d(256), 
                nn.ReLU(), 
                nn.AdaptiveMaxPool2d((4, 4)),

                nn.Conv2d(256, 512, 3, 1, 1, bias=False), 
                nn.BatchNorm2d(512), 
                nn.ReLU(), 
                nn.MaxPool2d(4),
                ) 

            self.fc1 = nn.Linear(512, 100)

        elif model_name == 'diin_densenet':
            #self.features = DNet(args)
            self.features = DNet(hid_size, block_config=(4, 4, 4), drop_rate=0.5, num_init_features=200)
            
            #self.features = DNet(args, block_config=(2, 2, 2), drop_rate=0.3, num_init_features=100)
            #self.fc1 = nn.Linear(2016, 100)
            self.fc1 = nn.Linear(2466, 100)
            #self.fc1 = nn.Linear(3425, 100)
        
        elif model_name == 'diin_inception':
            # input: 16 * 16
            self.features = nn.Sequential(
                nn.Conv2d(hid_size, 64, 1),

                InceptionA(64, 32), 
                nn.MaxPool2d(2),

                InceptionA(256, 32), 
                nn.MaxPool2d(2),

                nn.Conv2d(256, 512, 3, 1, 1, bias=False),
                nn.BatchNorm2d(512), 
                nn.ReLU(), 
                nn.AvgPool2d(4))

            self.fc1 = nn.Linear(512, 100)
        elif model_name == 'diin_inceptionB':
            # input: 24 * 24
            self.features = nn.Sequential(
                nn.Conv2d(hid_size, 100, 1),
                nn.BatchNorm2d(100),
                nn.ReLU(),

                InceptionA(100, 32), 
                nn.MaxPool2d(2),

                InceptionA(256, 64), 
                nn.MaxPool2d(2),
                
                InceptionA(288, 32),
                nn.MaxPool2d(2), 

                nn.Conv2d(288, 512, 3, 1, 1, bias=False),
                nn.BatchNorm2d(512), 
                nn.ReLU(), 
                nn.AvgPool2d(3))

            self.fc1 = nn.Linear(512, 100)
        self.last_layer = nn.Linear(100, 4)
        self.dp = nn.Dropout(self.dropout)
        self.hn = HighWay(hid_size)
            
    def forward(self, x1, x2):
        x1 = self.embedding(x1)
        x2 = self.embedding(x2)
        
        x1 = self.dp(self.hn(x1))
        x2 = self.dp(self.hn(x2))

        x1, _ = self.gru_enc(x1)
        x2, _ = self.gru_enc(x2)
        
        x1 = x1.unsqueeze(2) # B * T * 1 * H
        x2 = x2.unsqueeze(1) # B * 1 * T * H
        x = x1 * x2 # B * T * T * H
        #x_sub = x1 - x2
        #x = torch.cat((x, x_sub), dim=3)
        x = x.permute(0, 3, 1, 2) # B * H * T * T
#         print(x.shape)

        x = self.features(x)
        #print(x.size())
        x = x.squeeze()
        x = F.relu(self.dp(self.fc1(x)))
        x = F.sigmoid(self.last_layer(x))
        return x, x, x


In [5]:
torch.manual_seed(42)
torch.backends.cudnn.deterministic = True
np.random.seed(42)
model_name = 'DIIN'
if model_name == 'EmbeddingBag':
    model = EmbeddingBag(vecs).cuda()
elif model_name == 'Esim':
    model = Esim(vecs).cuda()
elif model_name == 'DIIN':
    model = DIIN(vecs, 'diin_my').cuda()
    
print(model)
opt = Adam(model.parameters(), lr=1e-3, weight_decay=1e-8)
#weight = torch.Tensor([3, 3, 3, 1])
#loss_fun = nn.CrossEntropyLoss(weight)
#scheduler = ReduceLROnPlateau(opt, 'min', factor=0.1, patience=0, verbose=True)
scheduler = StepLR(opt, step_size=4, gamma=0.1)
loss_fun = FocalLoss(gamma=5)
loss_fun.cuda()
epochs = 6
history = {'train':[], 'val': []}
min_loss = 100.1

for epoch in range(epochs):
    #print(f"epoch: {epoch + 1}/{epochs}")
    scheduler.step()
    train_loss = train_model(model, get_batch(train_filename, batch_size=32, max_len_b=100, data_aug=False), loss_fun, opt)
    val_loss = val_model(model, get_batch(val_filename, batch_size=32, max_len_b=100), loss_fun)
    if val_loss < min_loss:
        torch.save(model.state_dict(), './models/%s_%s_%.3f' % (model_name, epoch + 1, val_loss))
        
    #scheduler.step(val_loss)
    history['train'].append(train_loss)
    history['val'].append(val_loss)
    
my_plot(history)

DIIN(
  (embedding): Embedding(399670, 100)
  (gru_enc): LSTM(100, 100, batch_first=True)
  (features): Sequential(
    (0): Conv2d(100, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): Conv2d(64, 128, kernel_size=(7, 7), stride=(1, 1), padding=(3, 3), bias=False)
    (2): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (3): ReLU()
    (4): AdaptiveMaxPool2d(output_size=(32, 32))
    (5): Conv2d(128, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (6): BatchNorm2d(128, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (7): ReLU()
    (8): AdaptiveMaxPool2d(output_size=(16, 16))
    (9): Conv2d(128, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
    (10): BatchNorm2d(256, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    (11): ReLU()
    (12): AdaptiveMaxPool2d(output_size=(8, 8))
    (13): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
  

Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"





Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"





Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"


KeyboardInterrupt: 

In [6]:
history

{'train': [0.15486759472930867], 'val': [0.15710474238839256]}

In [7]:
# model_name = 'Esim'
# if model_name == 'EmbeddingBag':
#     model = EmbeddingBag(vecs).cuda()
# elif model_name == 'CE':
#     model = CE(vecs).cuda()
# elif model_name == 'IE':
#     model = IE(vecs).cuda()
# elif model_name == 'Esim':
#     model = Esim(vecs).cuda()
# elif model_name == 'Dattn':
#     model = Dattn(vecs).cuda()
# elif model_name == 'Declare':
#     model = Declare(vecs).cuda()

# model.load_state_dict(torch.load('./models/Esim_10_0.069'))
test_model(model, get_batch(val_filename, batch_size=64, max_len_b=100))
print('*' * 50)
test_model(model, get_batch(test_filename, batch_size=64, max_len_b=100))

Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"





Exception in thread Thread-12:
Traceback (most recent call last):
  File "C:\Users\yanji\Anaconda3\lib\threading.py", line 916, in _bootstrap_inner
    self.run()
  File "C:\Users\yanji\Anaconda3\lib\site-packages\tqdm\_tqdm.py", line 144, in run
    for instance in self.tqdm_cls._instances:
  File "C:\Users\yanji\Anaconda3\lib\_weakrefset.py", line 60, in __iter__
    for itemref in self.data:
RuntimeError: Set changed size during iteration




classification report:
accuracy: 0.717
             precision    recall  f1-score   support

          0       0.00      0.00      0.00       762
          1       0.00      0.00      0.00       162
          2       0.00      0.00      0.00      1800
          3       0.72      1.00      0.84      6898

avg / total       0.51      0.72      0.60      9622

macro f1: 0.209
score: 0.388
**************************************************


Widget Javascript not detected.  It may not be installed properly. Did you enable the widgetsnbextension? If not, then run "jupyter nbextension enable --py --sys-prefix widgetsnbextension"


KeyboardInterrupt: 