In [0]:
from sklearn.datasets import fetch_20newsgroups
from sklearn.model_selection import train_test_split
import numpy as np
import torch as tt
import torch.utils
from torch.utils.data import DataLoader, TensorDataset

from allennlp.modules.elmo import Elmo, batch_to_ids

options_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json"
weight_file = "https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5"

# Compute two different representation for each token.
# Each representation is a linear weighted combination for the
# 3 layers in ELMo (i.e., charcnn, the outputs of the two BiLSTM))
elmo = Elmo(options_file, weight_file, 2, dropout=0, requires_grad= False)

In [0]:
import torch.nn as nn
import torch.optim as optim
import pandas as pd
from tqdm import tqdm
from tqdm._tqdm_notebook import tqdm_notebook

In [0]:
if torch.cuda.is_available():
    torch.set_gpu_as_default_device()

In [0]:
newsgroups_all = fetch_20newsgroups(subset='all')

In [0]:
X_train, X_test, y_train, y_test = train_test_split(newsgroups_all['data'], newsgroups_all['target'], test_size=0.2, random_state=42)

In [0]:
n_train = len(y_train)
n_test = len(y_test)

In [0]:
N_TRAIN = 1000
N_TEST = 1000

In [57]:
idx1 = np.random.choice(range(n_train), size = N_TRAIN)
idx2 = np.random.choice(range(n_train), size = N_TRAIN)

y_train_real = (np.array([y_train[i] for i in idx1]) == np.array([y_train[i] for i in idx2])).astype(int)
y_train_real = tt.from_numpy(y_train_real).float()

x_train_real1 = np.array([X_train[i][:200] for i in idx1])
x_train_real2 = np.array([X_train[i][:200] for i in idx2])
x_train_real1 = batch_to_ids(x_train_real1)
x_train_real2 = batch_to_ids(x_train_real2)




In [0]:
idx1 = np.random.choice(range(n_test), size = N_TEST)
idx2 = np.random.choice(range(n_test), size = N_TEST)

y_test_real = (np.array([y_test[i] for i in idx1]) == np.array([y_test[i] for i in idx2])).astype(int)
x_test_real1 = np.array([X_test[i][:200] for i in idx1])
x_test_real2 = np.array([X_test[i][:200] for i in idx2])

y_test_real = tt.from_numpy(y_test_real).float()
x_test_real1 = batch_to_ids(x_test_real1)
x_test_real2 = batch_to_ids(x_test_real2)

In [0]:
batch_size = 8
train_loader = DataLoader(TensorDataset(x_train_real1, x_train_real2, y_train_real), batch_size=batch_size)
val_loader = DataLoader(TensorDataset(x_test_real1, x_test_real2, y_test_real), batch_size=batch_size)

In [0]:
def _train_epoch(model, iterator, optimizer, curr_epoch):

    model.train()

    running_loss = 0

    n_batches = len(iterator)

    iterator = tqdm_notebook(iterator, total=n_batches, desc='epoch %d' % (curr_epoch), leave=True)

    for i, batch in enumerate(iterator):
        optimizer.zero_grad()
        print(all(x.requires_grad == False for x in elmo._elmo_lstm.parameters()))
        loss = model(batch)
        print(loss)
        loss.backward()
        optimizer.step()

        curr_loss = loss.data.cpu().detach().item()
        
        
        loss_smoothing = i / (i+1)
        running_loss = loss_smoothing * running_loss + (1 - loss_smoothing) * curr_loss

        iterator.set_postfix(loss='%.5f' % running_loss)

    return running_loss

def _test_epoch(model, iterator):
    model.eval()
    epoch_loss = 0

    n_batches = len(iterator)
    with tt.no_grad():
        for batch in iterator:
            loss = model(batch)
            epoch_loss += loss.data.item()

    return epoch_loss / n_batches


def nn_train(model, train_iterator, valid_iterator, optimizer, n_epochs=100,
          scheduler=None, early_stopping=0):

    prev_loss = 100500
    es_epochs = 0
    best_epoch = None
    history = pd.DataFrame()

    for epoch in range(n_epochs):
        train_loss = _train_epoch(model, train_iterator, optimizer, epoch)
        valid_loss = _test_epoch(model, valid_iterator)

        valid_loss = valid_loss
        print('validation loss %.5f' % valid_loss)

        record = {'epoch': epoch, 'train_loss': train_loss, 'valid_loss': valid_loss}
        history = history.append(record, ignore_index=True)

        if early_stopping > 0:
            if valid_loss > prev_loss:
                es_epochs += 1
            else:
                es_epochs = 0

            if es_epochs >= early_stopping:
                best_epoch = history[history.valid_loss == history.valid_loss.min()].iloc[0]
                print('Early stopping! best epoch: %d val %.5f' % (best_epoch['epoch'], best_epoch['valid_loss']))
                break

            prev_loss = min(prev_loss, valid_loss)

In [0]:
class MyModel(nn.Module):
    
    def __init__(self, elmo, criterion):
        super(MyModel, self).__init__()
        self.elmo = elmo
        self.criterion = criterion
        
        #self.fc = nn.Linear(1024*2, 128)
        self.fc = nn.Linear(1024*2, 32)
        #self.out = nn.Linear(128*3, 1)
        self.out = nn.Linear(32*3, 1)
        
    def branch(self, x):
        x = self.elmo(x)['elmo_representations']
        x = tt.cat(x, dim=-1)
        x = x.mean(dim=1)
        x = self.fc(x)
        return x
        
    def forward(self, batch):
        
        q1, q2, y = batch
        
        q1 = self.branch(q1)
        q2 = self.branch(q2)
        
        # simetric functions
        x = tt.cat([tt.abs(q1-q2), q1*q2, q1+q2], dim=-1)
        
        x = self.out(x).squeeze(1)
        loss = self.criterion(x,y)
        
        return loss



model = MyModel(elmo, nn.BCEWithLogitsLoss())

optimizer = optim.Adam(model.parameters())

nn_train(model, train_loader, val_loader, optimizer, n_epochs=2)

HBox(children=(IntProgress(value=0, description='epoch 0', max=125, style=ProgressStyle(description_width='ini…

True
tensor(0.6113, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0923, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.5283, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0083, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(1.3252, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0118, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0236, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0429, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0573, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(1.1338, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.1452, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.2101, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.1601, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.3794, grad_fn=<BinaryCrossEntropyWithLogitsBackward>)
True
tensor(0.0582, grad_fn=<BinaryCrossEntropyW

In [0]:
def triplet_loss(anchor_embed, pos_embed, neg_embed):
    return F.cosine_similarity(anchor_embed, neg_embed) - F.cosine_similarity(anchor_embed, pos_embed)
    
    
class Tripletnet(nn.Module):
    def __init__(self):
        super(Tripletnet, self).__init__()
        self.elmo = elmo
        self.fc = nn.Linear(1024*2, 128)
        
    def branch(self, x):
        x = self.elmo(x)['elmo_representations']
        x = tt.cat(x, dim=-1)
        x = x.mean(dim=1)
        x = self.fc(x)
        
        return x

    def forward(self, anchor, pos, neg):
        
        anchor = self.branch(anchor)
        pos = self.branch(pos)
        neg = self.branch(neg)
        
        return triplet_loss(anchor, pos, neg)

In [50]:
model = Tripletnet()
model.forward()

TypeError: ignored

In [0]:
df = pd.read_csv('train.csv')
df = df[df.index<100]

In [0]:
df_train, df_val = train_test_split(df, test_size=0.2, random_state=42, shuffle=True)

xq1_train = batch_to_ids(df_train.question1.values)
xq2_train = batch_to_ids(df_train.question2.values)
y_train = tt.from_numpy(df_train.is_duplicate.values).float()

xq1_val = batch_to_ids(df_val.question1.values)
xq2_val = batch_to_ids(df_val.question2.values)
y_val = tt.from_numpy(df_val.is_duplicate.values).float()

In [0]:
batch_size = 1
train_loader = DataLoader(TensorDataset(xq1_train, xq2_train, y_train), batch_size=batch_size)
val_loader = DataLoader(TensorDataset(xq1_val, xq2_val, y_val), batch_size=batch_size)

In [9]:
df_train

Unnamed: 0,id,qid1,qid2,question1,question2,is_duplicate
55,55,111,112,How difficult is it get into RSI?,Do you apply for programs like RSI when you're...,0
88,88,177,178,Which is the best gaming laptop under 60k INR?,Which is the best gaming laptop under Rs 60000?,1
26,26,53,54,What is web application?,What is the web application framework?,0
42,42,85,86,"Can I make 50,000 a month by day trading?","Can I make 30,000 a month by day trading?",0
69,69,139,140,At what cost does so much privacy as in German...,Are there any people who genuinely enjoy salad...,0
...,...,...,...,...,...,...
60,60,121,122,How do I download content from a kickass torre...,Is Kickass Torrents trustworthy?,0
71,71,143,144,What is a narcissistic personality disorder?,What is narcissistic personality disorder?,1
14,14,29,30,What are the laws to change your status from a...,What are the laws to change your status from a...,0
92,92,185,186,What are some of the best romantic movies in E...,What is the best romantic movie you have ever ...,1


In [10]:
for i in df_train.index:
  print(len(df_train.question1[i]))

33
46
24
41
96
93
19
56
60
33
43
135
43
38
24
86
32
66
64
28
22
41
30
26
49
60
73
79
69
43
79
50
57
27
31
42
19
51
115
66
21
58
41
67
52
47
59
43
59
31
24
49
139
67
171
41
110
72
43
39
30
75
127
38
51
21
37
73
24
54
89
69
126
125
26
70
44
141
53
56
