In [None]:
SUBMISSION = True
SKIP = 16

FAST_SUBMISSION = True

In [None]:
# Installations & Downloads
!pip install -U --no-build-isolation --no-deps /kaggle/input/transformers-master/ -qq
!pip install --no-index --find-links /kaggle/input/hf-datasets/wheels datasets -qq

# Notebook Imports & Setup
from functools import partial
from tqdm.auto import tqdm
from pathlib import Path
import tensorflow as tf
from time import time
import pandas as pd
import numpy as np
import random 
import joblib
import re

import tensorflow_addons as tfa
import tensorflow as tf

import transformers
import tokenizers
import datasets

# Enable Mixed Precision and JIT Compilation
def _enable_mixed_precision(): 
    from tensorflow.keras.mixed_precision import experimental as mixed_precision
    policy = mixed_precision.Policy('mixed_float16')
    mixed_precision.set_policy(policy)

_enable_mixed_precision()
tf.config.optimizer.set_jit(True)

In [None]:
BACKBONES_DIR = Path('/kaggle/input/toxic-internet-deep-model-backbones')
WEIGHTS_DIR = Path('/kaggle/input/toxic-internet-ensemble-model-weights')
if SUBMISSION: 
    test = pd.read_csv('../input/jigsaw-toxic-severity-rating/comments_to_score.csv')
    test['comment_text'] = test.text
else: 
    test = pd.read_csv('../input/toxic-dataframes/test_comments.csv')
    test['text'] = test.comment_text

# old = pd.read_csv('../input/toxic-dataframes/old_pseudo_label.csv')
# valid = pd.read_csv('../input/toxic-dataframes/valid.csv')
# #valid = valid[valid.fold==3]
# valid = valid[valid.more_toxic.isin(old.comment_text) & valid.less_toxic.isin(old.comment_text)]
# # # valid = pd.concat([valid, pd.read_csv('../input/toxic-dataframes/manual_pair_labels (1).csv')])
# comments = np.unique(np.concatenate([valid.more_toxic.values, valid.less_toxic.values]))
# test = pd.DataFrame({'comment_text': comments, 'text': comments})
# test['comment_id'] = 0

if len(test) < 10000: 
    SKIP = SKIP * 4
if FAST_SUBMISSION and len(test) < 10000: 
    test = test.sample(1000)

pipeline = joblib.load('/kaggle/input/toxic-dataframes/pipeline_lb864.pkl')
%time test['tfidf_score'] = pipeline.predict(test.text)
test['tfidf_score'] = test.tfidf_score.rank(method='first')
TFIDF_SCORES = test.tfidf_score.values

robertal_tokenizer = transformers.AutoTokenizer.from_pretrained(BACKBONES_DIR/'roberta_large')
with tf.device('/device:GPU:0'): 
    robertal_backbone = transformers.TFAutoModel.from_pretrained(BACKBONES_DIR/'roberta_large')

# Model Type 1: Single Backbone w. Late Interaction Layer

In [None]:
def convert_dataset_to_test_ds(dataset): 
    'Processed huggingface dataset to tensorflow dataset'
    dataset.set_format(type='numpy')
    input_ids_ds = tf.data.Dataset.from_tensor_slices(dataset['input_ids'].astype(np.int32))
    attention_mask_ds = tf.data.Dataset.from_tensor_slices(dataset['attention_mask'].astype(np.int32))
    ds = tf.data.Dataset.zip((input_ids_ds, attention_mask_ds))
    ds = tf.data.Dataset.zip((ds, ds))
    return ds

def tokenize(examples): 
    return tokenizer(
        examples['comment_text'], 
        max_length=MAX_SEQ_LEN, 
        padding='max_length', 
        truncation=True, 
    )

def df_to_test_ds(df):
    raw_dataset = datasets.Dataset.from_pandas(df)
    processed_dataset = raw_dataset.map(tokenize, batched=True)
    test_ds = convert_dataset_to_test_ds(processed_dataset)
    return test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE) 

def load_tokenizer_and_backbone(backbone_name): 
    if backbone_name == 'roberta_large': 
        return robertal_tokenizer, robertal_backbone
    folder = BACKBONES_DIR / backbone_name
    print('Loading tokenizer and backbone from', folder)
    tokenizer = transformers.AutoTokenizer.from_pretrained(str(folder))
    with tf.device('/device:GPU:0'): 
        backbone = transformers.TFAutoModel.from_pretrained(str(folder))
    return tokenizer, backbone

def build_hidden_layer(hidden_layer_units):
    if hidden_layer_units is None: 
        return lambda x: x
    hidden_layers = []
    for unit in hidden_layer_units: 
        hidden_layers.append(tf.keras.layers.Dense(unit, activation=tfa.activations.mish))
    return tf.keras.Sequential(hidden_layers, name='hidden_layer')

def build_model(backbone): 
    input_ids = tf.keras.Input((MAX_SEQ_LEN,), dtype=tf.int32)
    attention_mask = tf.keras.Input((MAX_SEQ_LEN,), dtype=tf.int32)

    backbone_outputs = backbone(
        input_ids=input_ids, 
        attention_mask=attention_mask, 
        return_dict=True,
    )
    x = backbone_outputs.pooler_output
    hidden_layer = build_hidden_layer(HIDDEN_LAYERS)
    x = hidden_layer(x)
    
    score_layer = tf.keras.layers.Dense(1, activation='sigmoid')
    model = tf.keras.Model([input_ids, attention_mask],  outputs=score_layer(x))
    return model


def build_interaction_model():
    x_dim = 1024
    A_x = tf.keras.Input((x_dim,), dtype=tf.float32)
    B_x = tf.keras.Input((x_dim,), dtype=tf.float32)
    if CONCAT_ABS_DIFF:
        abs_diff = tf.math.abs(A_x-B_x)
        x = tf.concat([A_x, B_x, abs_diff], axis=-1)
    else: 
        x = tf.concat([A_x, B_x], axis=-1)
    inter_hidden_layer = build_hidden_layer(INTER_HIDDEN_LAYERS)
    x = inter_hidden_layer(x)
    x = tf.keras.layers.Dense(1, activation='sigmoid')(x)
    return tf.keras.Model([A_x, B_x], outputs=x)

def temp_model(backbone): 
    input_ids = tf.keras.Input((MAX_SEQ_LEN,), dtype=tf.int32)
    attention_mask = tf.keras.Input((MAX_SEQ_LEN,), dtype=tf.int32)
    x = backbone(input_ids=input_ids, attention_mask=attention_mask).pooler_output
    return tf.keras.Model([input_ids, attention_mask], outputs=x)

def get_pooler_output(backbone, ds): 
    return np.squeeze(temp_model(backbone).predict(ds, verbose=1).astype(np.float32))

def build_test_ds(batch_i, num_compares, B_x):
    start_i = batch_i * INTR_BATCH_SIZE 
    A_x = np.concatenate([
        np.stack([sub_x[i] for _ in range(num_compares)])
        for i in range(start_i, start_i+INTR_BATCH_SIZE)
    ])
    A_ds = tf.data.Dataset.from_tensor_slices(A_x)
    B_ds = tf.data.Dataset.from_tensor_slices(B_x)
    ds = tf.data.Dataset.zip((A_ds, B_ds))
    ds = tf.data.Dataset.zip((ds, A_ds))
    return ds.batch(4096*4).prefetch(tf.data.AUTOTUNE)

def split_for_each_i(array, num_compares): 
    return np.split(array, np.arange(num_compares, len(array), num_compares))

def compute_scores(sub_x, sub):
    B_x = sub_x[np.arange(START, len(sub), SKIP)]
    num_compares = len(B_x)
    B_x = np.row_stack([B_x for _ in range(INTR_BATCH_SIZE)])
    
    sub_scores = []
    for batch_i in tqdm(range(len(sub)//INTR_BATCH_SIZE)): 
        test_ds = build_test_ds(batch_i, num_compares, B_x)
        scores = scorer.predict(test_ds, verbose=0)
        scores = np.squeeze(scores.astype(np.float32))
        scores_all = split_for_each_i(scores, num_compares)
        for score_arr in scores_all:
            thresh = 0.50
            num_wins = (score_arr>thresh).sum()
            sub_scores.append(num_wins+2*score_arr.mean())
    
    num_additions = len(sub)-len(sub_scores)
    print(f'Adding {num_additions} random values')
    sub_scores = sub_scores + [sum(sub_scores)/len(sub)+random.random() for _ in range(num_additions)]
    sub['score'] = sub_scores
    sub[['comment_id', 'score']].to_csv('submission.csv', index=False)
    sub.tfidf_score = sub.tfidf_score.rank(method='first')
    sub.score = sub.score.rank(method='first')
    display(sub.sort_values(by='score'))
#     display(sub.sort_values(by='tfidf_score'))
#     display(test.sample(frac=1.))
    return sub.score.values

In [None]:
INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 0, 128

BACKBONE_WT = 'backbonelb844_loss1402_acc772.h5'
INTR_WT = 'inter_model_loss1402_acc772_hi1024.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL1_LB863_SCORES: \n')    
MODEL1_LB863_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 1, 128

BACKBONE_WT = 'backbonelb839_loss152_acc778.h5'
INTR_WT = 'inter_model_loss152_acc778.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)


print('MODEL2_LB872_SCORES: \n')    
MODEL2_LB872_SCORES = compute_scores(sub_x, test)

START = SKIP-1 
MODEL2_LB872_SCORES += compute_scores(sub_x, test)
START = SKIP-2
MODEL2_LB872_SCORES += compute_scores(sub_x, test)
START = SKIP-3
MODEL2_LB872_SCORES += compute_scores(sub_x, test)
MODEL2_LB872_SCORES = MODEL2_LB872_SCORES / 4

INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 2, 128

BACKBONE_WT = 'backbonelb844_loss1488_acc776.h5'
INTR_WT = 'inter_model_loss1488_acc776.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL3_SCORES: \n')    
MODEL3_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 3, 128

BACKBONE_WT = 'backbonelb838_loss1544_acc765.h5'
INTR_WT = 'inter_model_loss1544_acc765.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL4_LB852_SCORES: \n')    
MODEL4_LB852_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 192, 512
START, INTR_BATCH_SIZE = 4, 128

BACKBONE_WT = 'backbonelb8392_loss1503_acc782.h5'
INTR_WT = 'inter_model_loss1503_acc782.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL5_SCORES: \n')    
MODEL5_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 5, 128

BACKBONE_WT = 'backbonelb844freeze_loss128_acc75.h5'
INTR_WT = 'inter_model_loss128_acc75.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL6_SCORES: \n')    
MODEL6_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 192, 512
START, INTR_BATCH_SIZE = 6, 128

BACKBONE_WT = 'backboneVanillaFreezelb837_loss1406_acc80.h5'
INTR_WT = 'inter_model_loss1406_acc80.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL7_SCORES: \n')    
MODEL7_SCORES = compute_scores(sub_x, test)


INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 7, 128

BACKBONE_WT = 'backboneseq256_loss152_acc77.h5'
INTR_WT = 'inter_model_loss152_acc77.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL8_SCORES: \n')    
MODEL8_SCORES = compute_scores(sub_x, test)



INTER_HIDDEN_LAYERS = [256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 7, 128

BACKBONE_WT = 'backbone_loss1098_acc798_trloss176.h5'
INTR_WT = 'inter_model_loss1098_acc798_trloss176.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL9_SCORES: \n')    
MODEL9_SCORES = compute_scores(sub_x, test)


INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 8, 128

BACKBONE_WT = 'backbone_loss1044_acc86_any10_both100.h5'
INTR_WT = 'inter_model_loss1044_acc86_any10_both100.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL11_SCORES: \n')
MODEL11_SCORES = compute_scores(sub_x, test)




INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 9, 128

BACKBONE_WT = 'backbone_loss1344_acc77.h5'
INTR_WT = 'inter_model_loss1344_acc77.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL12_SCORES: \n')
MODEL12_SCORES = compute_scores(sub_x, test)




INTER_HIDDEN_LAYERS = [512, 128, 32, 8]
CONCAT_ABS_DIFF = True

MAX_SEQ_LEN, BATCH_SIZE = 256, 512
START, INTR_BATCH_SIZE = 0, 128

START = 0
SKIP = SKIP // 4
INTR_BATCH_SIZE = INTR_BATCH_SIZE // 4

BACKBONE_WT = 'backbone_loss20_acc100_trloss21.h5'
INTR_WT = 'inter_model_loss20_acc100_trloss21.h5'

WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
BACKBONE = 'roberta_large'

with tf.device('/device:GPU:0'): 
    tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
    backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
    sub_x = get_pooler_output(backbone, df_to_test_ds(test))
    scorer = build_interaction_model()
    scorer.load_weights(WEIGHTS_DIR/INTR_WT)

print('MODEL10_SCORES_LB862: \n')    
MODEL10_SCORES_LB862 = compute_scores(sub_x, test)
MODEL10_SCORES_LB862 = MODEL10_SCORES_LB862 / 4

In [None]:
# SCORES = [
#     MODEL1_LB863_SCORES, MODEL2_LB872_SCORES, MODEL3_SCORES, MODEL5_SCORES, 
#     MODEL4_LB852_SCORES, MODEL6_SCORES, MODEL7_SCORES, MODEL8_SCORES, MODEL9_SCORES, 
#     MODEL10_SCORES_LB862, MODEL11_SCORES, MODEL12_SCORES, 
# ]
# less_toxic_comments = valid.less_toxic.values
# more_toxic_comments = valid.more_toxic.values
# test_comments = test.comment_text.values

# BEST_ACC = 0
# for i in tqdm(range(300000)): 
#     weights = [random.random()-0.01*random.random() for _ in SCORES]
#     final_score = np.array([score*wt for score, wt in zip(SCORES, weights)]).sum(axis=0)
#     comment_to_score = {test_comment: score for test_comment, score in zip(test_comments, final_score)}
    
#     correct, wrong = 0, 0
#     for less_toxic, more_toxic in zip(less_toxic_comments, more_toxic_comments): 
#         if comment_to_score[more_toxic] > comment_to_score[less_toxic]: 
#             correct += 1
#         else: 
#             wrong += 1
#     acc = correct / (correct+wrong)
#     if acc > BEST_ACC: 
#         BEST_ACC = acc
#         print(f'new best accuracy ({i}): ', BEST_ACC)
#         print('weights: ', weights)
#         print('*'*100)

In [None]:
# Model 6: Good at recognizing low toxicity comments
# Model 9: Very Very bad at low toxicity comments
# Model 10: Horrible predictions but somehow decent score of 862?

test.score = \
0.05064488852494361 * MODEL1_LB863_SCORES + \
0.9130546627400447 * MODEL2_LB872_SCORES + \
0.0034512375086494518 * MODEL3_SCORES + \
0.18933604708602006 * MODEL5_SCORES + \
0.4050676922435245 * MODEL4_LB852_SCORES + \
0.04471655339978278 * MODEL6_SCORES + \
0.03217086659636254 * MODEL7_SCORES + \
0.04340479591574183 * MODEL8_SCORES + \
0.132781900698542 * MODEL9_SCORES + \
0.2518645874220956 * MODEL10_SCORES_LB862 + \
0.32597758670120247 * MODEL11_SCORES + \
0.4428370339374257 * MODEL12_SCORES


test.score = test.score.rank(method='first')
test[['comment_id', 'score']].to_csv('/kaggle/working/submission.csv', index=False)

display(test.sort_values(by='score'))
display(test.sort_values(by='tfidf_score'))
display(test.sample(frac=1.))

In [None]:
# LB: 873
# test.score = \
# MODEL_1_SCORES + \
# 4*MODEL_2_SCORES + \
# 2*MODEL_3_SCORES + \
# MODEL_5_SCORES/2 + \
# MODEL_6_SCORES/2 + \
# MODEL_7_SCORES + \
# MODEL_8_SCORES_LB844/4 + \
# MODEL_9_SCORES/4

# INTER_HIDDEN_LAYERS = [1024, 256, 64, 16, 4]
# CONCAT_ABS_DIFF = True

# MAX_SEQ_LEN, BATCH_SIZE = 256, 512
# START, INTR_BATCH_SIZE = 4, 256

# BACKBONE_WT = 'backboneseq256_loss15_acc78.h5'
# INTR_WT = 'inter_model_loss15_acc78.h5'

# WEIGHTS_DIR = Path('../input/toxic-internet-ensemble-model-weights')
# BACKBONE = 'roberta_large'

# with tf.device('/device:GPU:0'): 
#     tokenizer, backbone = load_tokenizer_and_backbone(BACKBONE)
#     backbone.load_weights(WEIGHTS_DIR/BACKBONE_WT)
#     sub_x = get_pooler_output(backbone, df_to_test_ds(test))
#     scorer = build_interaction_model()
#     scorer.load_weights(WEIGHTS_DIR/INTR_WT)
    
# MODEL8_SCORES_LB844 = compute_scores(sub_x, test)