In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys

sys.path.append('/work/examples')

In [3]:
import torch
from bert_sentiment import *

In [4]:
torch.cuda.empty_cache()

In [5]:
df_train = pd.read_csv(CFG.TRAINING_FILE).fillna("none")

In [6]:
def train_kfold(df_train: pd.DataFrame, n_splits: int = CFG.N_SPLITS):
    oof = np.zeros(len(df_train), dtype=float)
    kf = model_selection.StratifiedKFold(
        n_splits=n_splits, shuffle=True, random_state=42)
    x = np.zeros(len(df_train))
    y = df_train['target'].values
    train_dataset = Dataset(
        text=df_train.text.values, target=df_train.target.values
    )
    for fold, (train_index, valid_index) in enumerate(kf.split(x, y)):
        k_train_dataset = torch.utils.data.Subset(train_dataset, train_index)
        k_train_data_loader = torch.utils.data.DataLoader(
            k_train_dataset, batch_size=CFG.TRAIN_BATCH_SIZE, num_workers=4
        )
        k_valid_dataset = torch.utils.data.Subset(train_dataset, valid_index)
        k_valid_data_loader = torch.utils.data.DataLoader(
            k_valid_dataset, batch_size=CFG.TRAIN_BATCH_SIZE, num_workers=4
        )
        device = torch.device(CFG.DEVICE)
        model = BertBase()
        model.to(device)
        param_optimizer = list(model.named_parameters())
        no_decay = ["bias", "LayerNorm.bias", "LayerNorm.weight"]
        optimizer_parameters = [
            {
                "params": [
                    p for n, p in param_optimizer if not any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.001,
            },
            {
                "params": [
                    p for n, p in param_optimizer if any(nd in n for nd in no_decay)
                ],
                "weight_decay": 0.0,
            },
        ]

        num_train_steps = int(
            len(df_train) / CFG.TRAIN_BATCH_SIZE * CFG.EPOCHS)
        optimizer = AdamW(optimizer_parameters, lr=3e-5)
        scheduler = get_linear_schedule_with_warmup(
            optimizer, num_warmup_steps=0, num_training_steps=num_train_steps
        )
        best_auc = 0
        for epoch in range(CFG.EPOCHS):
            outputs, targets = eval_fn(
                k_valid_data_loader, model, device)
            auc = metrics.roc_auc_score(y_true=targets, y_score=outputs)
            print(f"Fold = {fold}, Epoch = {epoch}, AUC = {auc}")
            train_fn(k_train_data_loader, model,
                     optimizer, device, scheduler)
            if auc > best_auc:
                torch.save(model.state_dict(),
                           f'/work/models/kb_test/model_{fold}.bin')
                best_auc = auc
                oof[valid_index] = outputs
    return oof

In [7]:
oof = train_kfold(df_train)

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 0, Epoch = 0, AUC = 0.6053101456007665


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 0, Epoch = 1, AUC = 0.9463759490179571


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 0, Epoch = 2, AUC = 0.9643328929986791


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 1, Epoch = 0, AUC = 0.521774790599097


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 1, Epoch = 1, AUC = 0.9338772192140752


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 1, Epoch = 2, AUC = 0.954918925196336


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 2, Epoch = 0, AUC = 0.5335702657131228


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 2, Epoch = 1, AUC = 0.9609134251991395


  0%|          | 0/59 [00:00<?, ?it/s]

  0%|          | 0/30 [00:00<?, ?it/s]

Fold = 2, Epoch = 2, AUC = 0.9661026803883946


  0%|          | 0/59 [00:00<?, ?it/s]

In [8]:
metrics.roc_auc_score(y_true=df_train['target'].values, y_score=oof)

0.9563973471462459

In [9]:
df_test = pd.read_csv(CFG.TEST_FILE)

In [10]:
test_dataset = Dataset(
    text=df_test.text.values, target=np.zeros(len(df_test))
)
test_data_loader = torch.utils.data.DataLoader(
    test_dataset, batch_size=8, num_workers=4
)

In [11]:
df_test['target'] = np.zeros(len(df_test))

In [12]:
for i in range(CFG.N_SPLITS):
    device = torch.device(CFG.DEVICE)
    model = BertBase()
    model.to(device)
    model.load_state_dict(torch.load(
        f'/work/models/kb_test/model_{i}.bin'))
    preds, _ = eval_fn(test_data_loader, model, device)
    df_test['target'] = df_test['target'] + preds

  0%|          | 0/176 [00:00<?, ?it/s]

  0%|          | 0/176 [00:00<?, ?it/s]

  0%|          | 0/176 [00:00<?, ?it/s]

In [13]:
df_test['target'] = df_test['target'] / CFG.N_SPLITS