In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchtext import data
from torchtext import datasets
import pandas as pd
import numpy as np
from sklearn import metrics
from sklearn.model_selection import train_test_split
from tqdm.notebook import tqdm
from catalyst import dl
import wandb
import joblib


DEVICE = 'cuda'
mydir = '/data2/competitions/quora-insincere-questions-classification'
SEED = 1234

tqdm.pandas()
# seed everything
import os
os.environ['PYTHONHASHSEED'] = str(SEED)
np.random.seed(SEED)
torch.manual_seed(SEED)
torch.cuda.manual_seed(SEED)
torch.backends.cudnn.deterministic = True

  from pandas import Panel
I0302 18:21:48.901045 140452000810816 file_utils.py:41] PyTorch version 1.4.0 available.
I0302 18:21:49.590968 140452000810816 file_utils.py:57] TensorFlow version 2.0.0 available.

The Panel class is removed from pandas. Accessing it from the top-level namespace will also be removed in the next version



In [2]:
def get_param_size(model, trainable=True):
    if trainable:
        psize = np.sum([np.prod(p.size()) for p in model.parameters() if p.requires_grad])
    else:
        psize = np.sum([np.prod(p.size()) for p in model.parameters()])
    return psize

## EMA

In [3]:
# https://discuss.pytorch.org/t/how-to-apply-exponential-moving-average-decay-for-variables/10856
class EMA():
    def __init__(self, model, mu, level='batch', n=1):
        """
        level: 'batch' or 'epoch'
          'batch': Update params every n batches.
          'epoch': Update params every epoch.
        """
        # self.ema_model = copy.deepcopy(model)
        self.mu = mu
        self.level = level
        self.n = n
        self.cnt = self.n
        self.shadow = {}
        for name, param in model.named_parameters():
            if param.requires_grad:
                self.shadow[name] = param.data

    def _update(self, model):
        for name, param in model.named_parameters():
            if param.requires_grad:
                new_average = (1 - self.mu) * param.data + self.mu * self.shadow[name]
                self.shadow[name] = new_average.clone()

    def set_weights(self, ema_model):
        for name, param in ema_model.named_parameters():
            if param.requires_grad:
                param.data = self.shadow[name]

    def on_batch_end(self, model):
        if self.level is 'batch':
            self.cnt -= 1
            if self.cnt == 0:
                self._update(model)
                self.cnt = self.n

    def on_epoch_end(self, model):
        if self.level is 'epoch':
            self._update(model)



## GRU Model

In [4]:
class GRUModel(nn.Module):
    def __init__(self, vocab_size, embed_dim, h_size, n_layers, dropout, padding_idx, 
                 pretrained_embedding=None, fix_embedding=True):
        super(GRUModel, self).__init__()
        self.is_pretrained = pretrained_embedding is not None
        
        if self.is_pretrained:
            self.embed = nn.Embedding.from_pretrained(pretrained_embedding, freeze=fix_embedding)
            self.embed.padding_idx = padding_idx
        else:
            self.embed = nn.Embedding(vocab_size, embed_dim, padding_idx=padding_idx)
            
        self.embed_drop = nn.Dropout(dropout)
            
        self.gru = nn.GRU(embed_dim, h_size, n_layers, 
                          batch_first=True, 
                          bidirectional=True, dropout=dropout)
        
        self.out = nn.Sequential(
            nn.Dropout(dropout),
            nn.Linear(2*n_layers*h_size, h_size),
            nn.BatchNorm1d(h_size),
            nn.PReLU(),
            nn.Linear(h_size, 1),
        )
        self.init_weights()

    def init_weights(self):
        if not self.is_pretrained:
            d = self.embed.weight.size(1)
            nn.init.uniform_(self.embed.weight, -1/np.sqrt(d), 1/np.sqrt(d))

    def forward(self, x):
        x = self.embed(x)
        x = self.embed_drop(x)
        x, _ = self.gru(x)
        x, _ = torch.max(x, 1)
        x = self.out(x).squeeze(1)
        return x

## Data preprocessing

In [5]:
class DataFrameDataset(data.Dataset):
     """Class for using pandas DataFrames as a datasource"""
     def __init__(self, examples, fields, filter_pred=None):
         """
         Create a dataset from a pandas dataframe of examples and Fields
         Arguments:
             examples pd.DataFrame: DataFrame of examples
             fields {str: Field}: The Fields to use in this tuple. The
                 string is a field name, and the Field is the associated field.
             filter_pred (callable or None): use only exanples for which
                 filter_pred(example) is true, or use all examples if None.
                 Default is None
         """
         self.examples = examples.apply(SeriesExample.fromSeries, args=(fields,), axis=1).tolist()
         if filter_pred is not None:
             self.examples = filter(filter_pred, self.examples)
         self.fields = dict(fields)
         # Unpack field tuples
         for n, f in list(self.fields.items()):
             if isinstance(n, tuple):
                 self.fields.update(zip(n, f))
                 del self.fields[n]
                    
class SeriesExample(data.Example):
     """Class to convert a pandas Series to an Example"""

     @classmethod
     def fromSeries(cls, data, fields):
         return cls.fromdict(data.to_dict(), fields)

     @classmethod
     def fromdict(cls, data, fields):
         ex = cls()

         for key, field in fields.items():
             if key not in data:
                 raise ValueError("Specified key {} was not found in "
                 "the input data".format(key))
             if field is not None:
                 setattr(ex, key, field.preprocess(data[key]))
             else:
                 setattr(ex, key, data[key])
         return ex

# Simple wrapper to join torchtext and catalyst API

class IteratorWrapper(torch.utils.data.DataLoader):
    __initialized__ = False

    def __init__(self, iter: iter):
        self.batch_size = iter.batch_size
        self.num_workers = 1
        self.collate_fn = None
        self.pin_memory = False
        self.drop_last = False
        self.timeout = 0
        self.worker_init_fn = None
        self.sampler = iter
        self.batch_sampler = iter
        self.__initialized__ = True

    def __iter__(self):
        return map(lambda batch: {
                    'features': batch.text,
                    'targets': batch.target,
                }, self.batch_sampler.__iter__())

    def __len__(self):
        return len(self.batch_sampler)

In [6]:
import re


puncts = ',.":)(-!?|;\'$&/[]>%=#*+\\•~@£·_{}©^®`<→°€™›♥←×§″′Â█½à…“★”–●â►−¢²¬░¶↑±¿▾═¦║\
―¥▓—‹─▒：¼⊕▼▪†■’▀¨▄♫☆é¯♦¤▲è¸¾Ã⋅‘∞∙）↓、│（»，♪╩╚³・╦╣╔╗▬❤ïØ¹≤‡√'


def clean_text(x, puncts=puncts): #добавляет пробелы вокруг пунктуации
    x = str(x)
    for punct in puncts:
        x = x.replace(punct, f' {punct} ')
    return x


def clean_numbers(x):
    x = re.sub('[0-9]{5,}', '#####', x)
    x = re.sub('[0-9]{4}', '####', x)
    x = re.sub('[0-9]{3}', '###', x)
    x = re.sub('[0-9]{2}', '##', x)
    return x

In [7]:
df_train = pd.read_csv(f'{mydir}/train.csv', index_col=0).reset_index(drop=True)
df_train = df_train.rename(columns={'question_text': 'text'})

df_train['text'] = df_train['text'].progress_apply(str.lower)
df_train['text'] = df_train['text'].progress_apply(clean_text)
df_train['text'] = df_train['text'].progress_apply(clean_numbers)

df_train, df_test = train_test_split(df_train, train_size=0.7, random_state=SEED)

augmented = pd.read_csv(f'{mydir}/augmented_fairseq.csv', index_col=0)
augmented = augmented.rename(columns={'question_text': 'text'})
augmented['text'] = augmented['text'].str[2:-2]

augmented['text'] = augmented['text'].progress_apply(str.lower)
augmented['text'] = augmented['text'].progress_apply(clean_text)
augmented['text'] = augmented['text'].progress_apply(clean_numbers)

df_train = pd.concat([df_train, augmented]).reset_index(drop=True)

HBox(children=(FloatProgress(value=0.0, max=1306122.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1306122.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=1306122.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=914280.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=914280.0), HTML(value='')))




HBox(children=(FloatProgress(value=0.0, max=914280.0), HTML(value='')))




In [8]:
max_len = 50

TEXT = data.Field(
#     include_lengths=True,
                  postprocessing = lambda batch, vocab: [x[:max_len] for x in batch],
                  lower=True,
                  tokenize='spacy', 
                  tokenizer_language='en', 
                  batch_first=True
                 )
LABEL = data.LabelField(dtype=torch.float)

train = DataFrameDataset(df_train, fields={'text': TEXT, 'target': LABEL})
test = DataFrameDataset(df_test, fields={'text': TEXT, 'target': LABEL})

TEXT.build_vocab(train, test, min_freq=5)
LABEL.build_vocab(train)

train, valid = train.split(split_ratio=0.9)
len(train), len(valid)

(1645708, 182857)

In [10]:
vocab_size = len(TEXT.vocab)
vocab_size

61101

In [11]:
h_size = 128
num_epochs = 10 
n_layers = 1
dropout = 0.1
embed_dim = 128
batch_size = 512


train_iter, valid_iter, test_iter = data.BucketIterator.splits((train, valid, test), 
                                                               batch_size=batch_size, 
                                                               sort_key=lambda x: len(x.text),
                                                               sort=True,
                                                               device=DEVICE)

In [12]:
train_iter = IteratorWrapper(train_iter)
valid_iter = IteratorWrapper(valid_iter)
test_iter = IteratorWrapper(test_iter)
loaders = {'train': train_iter, 'valid': valid_iter}


model = GRUModel(vocab_size=vocab_size, 
                 embed_dim=embed_dim, 
                 h_size=h_size, 
                 n_layers=n_layers, 
                 dropout=dropout, 
                 padding_idx=TEXT.vocab.stoi['<pad>'], 
                 pretrained_embedding=None, 
                 fix_embedding=False)


optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, verbose=True, patience=2, factor=0.5)


dropout option adds dropout after all but last recurrent layer, so non-zero dropout expects num_layers greater than 1, but got dropout=0.1 and num_layers=1



In [13]:
logdir = f'{mydir}/log_quora2_dup'
!rm -rf {logdir}

In [14]:
# use SupervisedWandbRunner runner to send statistics to wandb
runner = dl.SupervisedWandbRunner(DEVICE)
runner.train(model, 
             loaders=loaders,
             num_epochs=num_epochs,
             logdir=logdir,
             criterion=nn.BCEWithLogitsLoss(),
             optimizer=optimizer, 
             scheduler=scheduler,  
             callbacks=[
                dl.callbacks.CheckpointCallback(2), # save 2 best models (by epoch) into logdir
                dl.callbacks.EarlyStoppingCallback(3), # stop training, if valid loss does not improve last 3 epochs
             ],
             # send current hyperparam values to wandb
             monitoring_params={
                 'entity': 'denaas', # your wandb username
                 'project': 'text-augmentation', # project name
                 'name': 'quora-noembed-dup', # name of the specific run
                 'group': 'examples',
                 'config': {
                     'model': 'bigru',
                     'optimizer': str(optimizer),
                     'scheduler': 'plateau',
                     'early_stop': 3,
                     'vocab_size': vocab_size,
                     'h_size': h_size,
                     'n_layers': n_layers,
                     'dropout': dropout,
                     'batch_size': batch_size,
                     'embed_dim': embed_dim,
                     'max_len': max_len,
                 },
             },
#              check=True, # set if you want to check pipeline for correctness, without actual training
             verbose=True)



I0302 18:40:30.552883 140452000810816 run_manager.py:924] system metrics and metadata threads started
I0302 18:40:30.554684 140452000810816 run_manager.py:933] checking resume status, waiting at most 10 seconds
I0302 18:40:30.840525 140452000810816 run_manager.py:951] resuming run from id: UnVuOnYxOndxOGF4NW45OnRleHQtYXVnbWVudGF0aW9uOmRlbmFhcw==
I0302 18:40:30.849404 140452000810816 run_manager.py:963] upserting run before process can begin, waiting at most 10 seconds
I0302 18:40:31.206304 140444893705984 run_manager.py:1048] saving patches
I0302 18:40:31.246901 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/config.yaml
I0302 18:40:31.613936 140444893705984 run_manager.py:1052] saving pip packages
I0302 18:40:31.615559 140444893705984 run_manager.py:1054] initializing streaming files api
I0302 18:40:31.616397 140444893705984 run_manager.py:1061] unblocking file change observer, beginning sync with W&B servers
I0302 18:40:31.947334 1404452

1/10 * Epoch (train):  49% 1570/3215 [00:13<00:14, 110.81it/s, loss=0.071]

I0302 18:40:47.314888 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


1/10 * Epoch (train):  88% 2816/3215 [00:26<00:04, 91.29it/s, loss=0.148] 

I0302 18:40:59.331222 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl


1/10 * Epoch (train):  98% 3138/3215 [00:30<00:01, 67.80it/s, loss=0.235]

I0302 18:41:03.343275 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


1/10 * Epoch (train): 100% 3215/3215 [00:31<00:00, 102.78it/s, loss=0.430]
1/10 * Epoch (valid): 100% 358/358 [00:01<00:00, 180.61it/s, loss=0.070]
[2020-03-02 18:41:06,844] 
1/10 * Epoch 1 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=118954.7763 | _timers/batch_time=0.0051 | _timers/data_time=0.0040 | _timers/model_time=0.0011 | loss=0.1188
1/10 * Epoch 1 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=131461.3101 | _timers/batch_time=0.0046 | _timers/data_time=0.0037 | _timers/model_time=0.0009 | loss=0.1522


I0302 18:41:06.844314 140452000810816 logging.py:153] 
1/10 * Epoch 1 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=118954.7763 | _timers/batch_time=0.0051 | _timers/data_time=0.0040 | _timers/model_time=0.0011 | loss=0.1188
1/10 * Epoch 1 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=131461.3101 | _timers/batch_time=0.0046 | _timers/data_time=0.0037 | _timers/model_time=0.0009 | loss=0.1522


2/10 * Epoch (train):   0% 0/3215 [00:00<?, ?it/s]

I0302 18:41:07.432280 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-summary.json
I0302 18:41:07.582064 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-history.jsonl


2/10 * Epoch (train):  45% 1456/3215 [00:12<00:15, 111.38it/s, loss=0.046]

I0302 18:41:19.435553 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


2/10 * Epoch (train):  80% 2582/3215 [00:23<00:06, 91.13it/s, loss=0.116] 

I0302 18:41:34.551876 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl


2/10 * Epoch (train):  94% 3036/3215 [00:32<00:02, 81.65it/s, loss=0.196]

I0302 18:41:39.557515 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


2/10 * Epoch (train): 100% 3215/3215 [00:35<00:00, 90.93it/s, loss=0.229]
2/10 * Epoch (valid): 100% 358/358 [00:02<00:00, 176.30it/s, loss=0.080]


I0302 18:41:44.559857 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-summary.json
I0302 18:41:44.561057 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-history.jsonl


[2020-03-02 18:41:46,281] 
2/10 * Epoch 2 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=119979.5417 | _timers/batch_time=0.0063 | _timers/data_time=0.0052 | _timers/model_time=0.0011 | loss=0.0919
2/10 * Epoch 2 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=124282.3685 | _timers/batch_time=0.0047 | _timers/data_time=0.0037 | _timers/model_time=0.0009 | loss=0.1483


I0302 18:41:46.281005 140452000810816 logging.py:153] 
2/10 * Epoch 2 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=119979.5417 | _timers/batch_time=0.0063 | _timers/data_time=0.0052 | _timers/model_time=0.0011 | loss=0.0919
2/10 * Epoch 2 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=124282.3685 | _timers/batch_time=0.0047 | _timers/data_time=0.0037 | _timers/model_time=0.0009 | loss=0.1483


3/10 * Epoch (train):  29% 922/3215 [00:08<00:18, 126.09it/s, loss=0.051]

I0302 18:41:54.625303 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


3/10 * Epoch (train):  68% 2198/3215 [00:19<00:08, 113.94it/s, loss=0.047]

I0302 18:42:05.633713 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl


3/10 * Epoch (train):  84% 2688/3215 [00:24<00:05, 91.19it/s, loss=0.093] 

I0302 18:42:10.647188 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


3/10 * Epoch (train): 100% 3215/3215 [00:31<00:00, 103.59it/s, loss=0.094]
3/10 * Epoch (valid): 100% 358/358 [00:01<00:00, 183.67it/s, loss=0.097]


I0302 18:42:19.656490 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-summary.json
I0302 18:42:19.660100 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-history.jsonl


[2020-03-02 18:42:20,789] 
3/10 * Epoch 3 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=119077.5507 | _timers/batch_time=0.0050 | _timers/data_time=0.0039 | _timers/model_time=0.0011 | loss=0.0714
3/10 * Epoch 3 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=131287.3993 | _timers/batch_time=0.0045 | _timers/data_time=0.0036 | _timers/model_time=0.0009 | loss=0.1749


I0302 18:42:20.789206 140452000810816 logging.py:153] 
3/10 * Epoch 3 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=119077.5507 | _timers/batch_time=0.0050 | _timers/data_time=0.0039 | _timers/model_time=0.0011 | loss=0.0714
3/10 * Epoch 3 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=131287.3993 | _timers/batch_time=0.0045 | _timers/data_time=0.0036 | _timers/model_time=0.0009 | loss=0.1749


4/10 * Epoch (train):  20% 633/3215 [00:06<00:23, 111.07it/s, loss=0.032]

I0302 18:42:26.930310 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


4/10 * Epoch (train):  57% 1845/3215 [00:16<00:12, 109.80it/s, loss=0.031]

I0302 18:42:36.942469 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl


4/10 * Epoch (train):  77% 2480/3215 [00:22<00:07, 93.90it/s, loss=0.051] 

I0302 18:42:42.945572 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


4/10 * Epoch (train): 100% 3215/3215 [00:30<00:00, 104.12it/s, loss=0.110]
4/10 * Epoch (valid): 100% 358/358 [00:02<00:00, 174.11it/s, loss=0.074]


I0302 18:42:53.969449 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-summary.json
I0302 18:42:53.970352 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-history.jsonl


[2020-03-02 18:42:54,682] 
4/10 * Epoch 4 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=118463.0336 | _timers/batch_time=0.0049 | _timers/data_time=0.0038 | _timers/model_time=0.0011 | loss=0.0555
4/10 * Epoch 4 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=125565.9315 | _timers/batch_time=0.0048 | _timers/data_time=0.0038 | _timers/model_time=0.0009 | loss=0.1756


I0302 18:42:54.682512 140452000810816 logging.py:153] 
4/10 * Epoch 4 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=118463.0336 | _timers/batch_time=0.0049 | _timers/data_time=0.0038 | _timers/model_time=0.0011 | loss=0.0555
4/10 * Epoch 4 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=125565.9315 | _timers/batch_time=0.0048 | _timers/data_time=0.0038 | _timers/model_time=0.0009 | loss=0.1756


5/10 * Epoch (train):  13% 403/3215 [00:04<00:22, 125.77it/s, loss=0.037]

I0302 18:42:59.096154 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


5/10 * Epoch (train):  43% 1379/3215 [00:12<00:16, 109.75it/s, loss=0.020]

I0302 18:43:07.103767 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl


5/10 * Epoch (train):  71% 2273/3215 [00:20<00:08, 104.81it/s, loss=0.037]

I0302 18:43:15.113206 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json


5/10 * Epoch (train): 100% 3215/3215 [00:31<00:00, 103.30it/s, loss=0.052]
5/10 * Epoch (valid): 100% 358/358 [00:02<00:00, 177.50it/s, loss=0.137]


I0302 18:43:28.123273 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-summary.json
I0302 18:43:28.126941 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-history.jsonl


Early stop at 4 epoch
[2020-03-02 18:43:28,897] 
5/10 * Epoch 5 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=117992.3068 | _timers/batch_time=0.0050 | _timers/data_time=0.0039 | _timers/model_time=0.0011 | loss=0.0458
5/10 * Epoch 5 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=126070.1315 | _timers/batch_time=0.0047 | _timers/data_time=0.0037 | _timers/model_time=0.0010 | loss=0.1956


I0302 18:43:28.897202 140452000810816 logging.py:153] 
5/10 * Epoch 5 (train): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=117992.3068 | _timers/batch_time=0.0050 | _timers/data_time=0.0039 | _timers/model_time=0.0011 | loss=0.0458
5/10 * Epoch 5 (valid): _base/lr=0.0010 | _base/momentum=0.9000 | _timers/_fps=126070.1315 | _timers/batch_time=0.0047 | _timers/data_time=0.0037 | _timers/model_time=0.0010 | loss=0.1956
I0302 18:43:28.902315 140452000810816 run_manager.py:1068] shutting down system stats and metadata service


Top best models:
/data2/competitions/quora-insincere-questions-classification/log_quora2_dup/checkpoints/train.2.pth	0.1483
/data2/competitions/quora-insincere-questions-classification/log_quora2_dup/checkpoints/train.1.pth	0.1522


I0302 18:43:29.124420 140445267433216 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-events.jsonl
I0302 18:43:29.126821 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/log.txt
I0302 18:43:29.129561 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/train_log/events.out.tfevents.1583163633.UNIT-1482.3385.0
I0302 18:43:29.132676 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/valid_log/events.out.tfevents.1583163664.UNIT-1482.3385.1
I0302 18:43:29.133514 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/checkpoints
I0302 18:43:29.133874 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/valid_log
I0302 18:43:29.134430 140445267433216 run_manager.py:677] file/dir created: /tmp/wandb/run-20200302_154029-wq8ax5n9/train_log
I0302 18:43:

In [15]:
dl.utils.unpack_checkpoint(dl.utils.load_checkpoint(f'{logdir}//checkpoints/best_full.pth'), model=model)

In [16]:
import scipy


# find threshold
runner = dl.SupervisedRunner()
y_proba = runner.predict_loader(model, valid_iter)
y_proba = 1 / (1 + np.exp(-y_proba))
y_true = np.concatenate([x['targets'].cpu().numpy() for x in valid_iter])
res = scipy.optimize.minimize(
    lambda t: -metrics.f1_score(y_true, (y_proba >= t).astype(np.int)),
    x0=0.5,
    method='Nelder-Mead',
    tol=1e-3,
)
threshold = res.x[0]


runner = dl.SupervisedRunner()
y_proba = runner.predict_loader(model, test_iter)
y_proba = 1 / (1 + np.exp(-y_proba))
y_true = np.concatenate([x['targets'].cpu().numpy() for x in test_iter])

auc_test = metrics.roc_auc_score(y_true, y_proba)
f1_test = metrics.f1_score(y_true, (y_proba >= threshold).astype(np.int))

print(f1_test, threshold, auc_test)
wandb.log({'scores/f1': f1_test, 'scores/f1_threshold': threshold, 'scores/f1_auc': auc_test})

0.6459476392079434 0.7101562500000005 0.9594608178612632


I0302 18:43:47.385812 140452000810816 run_manager.py:924] system metrics and metadata threads started
I0302 18:43:47.389264 140452000810816 run_manager.py:933] checking resume status, waiting at most 10 seconds
I0302 18:43:47.637253 140452000810816 run_manager.py:951] resuming run from id: UnVuOnYxOndxOGF4NW45OnRleHQtYXVnbWVudGF0aW9uOmRlbmFhcw==
I0302 18:43:47.660762 140452000810816 run_manager.py:963] upserting run before process can begin, waiting at most 10 seconds
I0302 18:43:47.962509 140445244200704 run_manager.py:1048] saving patches
I0302 18:43:48.007071 140445275825920 run_manager.py:688] file/dir modified: /tmp/wandb/run-20200302_154029-wq8ax5n9/wandb-metadata.json
I0302 18:43:48.487831 140445244200704 run_manager.py:1052] saving pip packages
I0302 18:43:48.489408 140445244200704 run_manager.py:1054] initializing streaming files api
I0302 18:43:48.490300 140445244200704 run_manager.py:1061] unblocking file change observer, beginning sync with W&B servers
I0302 18:43:48.491509