In [1]:
import multiprocessing
from multiprocessing import Process, Queue


# from numba import cuda
# def free_gpu():
#     # cuda.select_device(0)
#     # cuda.close()
#     # cuda.select_device(0)
#     device = cuda.get_current_device()
#     device.reset()


# # free_gpu()

## BERT + 1/2 BiLSTM

In [2]:
import pandas as pd
import numpy as np
import tensorflow as tf
import transformers
from tqdm import tqdm
from sklearn.model_selection import train_test_split
import re
import emoji
import gc

In [3]:
## Constants
target_key = "score"
text_key = "text" #"txt"
batch_size = 32
bert_path = "bert-base-uncased" # "../input/bertbaseuncased/bert-base-uncased" #
train_path = "../input/clean-civil-data-jigsaw-downsampled/clean_civil.csv"
lstm_hidden_dim = 64 # from 768 (bert) to 64  # the decrease is steep; u may lose info
dropout_rate = 0.3
output_dim = 1
max_length = 350
n_epochs = 1
checkpoint_filepath = "./bert/ckpt-loss={loss:.5f}-epoch={epoch}-batch={batch}"
save_after_batches = 5000
test_size_percent =  0.05
test_path = "./train/comments_to_score.csv" #"../input/jigsaw-toxic-severity-rating/comments_to_score.csv"

## bert-1-bilstm ##
load_model_path = "./output/bert-1-bilstm/bert/final_model" #"../input/jisaw-bert-1-bilstm-epoch-2-loss01263/final_model"

In [4]:
comments_to_score_df = pd.read_csv(test_path)
comments_to_score_df.shape

(7537, 2)

In [5]:
class CivilDataGenerator(tf.keras.utils.Sequence): # could optimize more like BucketIterator for padding
    def __init__(self, texts, scores, tokenizer, batch_size=batch_size, shuffle=True, include_targets=True): # texts -> numpy array
        self.texts = texts
        self.scores = scores
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.include_targets = include_targets
        # Load our BERT Tokenizer to encode the text.
        self.tokenizer =  tokenizer # 
        self.indexes = np.arange(len(self.texts))
        self.on_epoch_end()
        
    def __len__(self):
        # Denotes the number of batches per epoch.
        return len(self.texts) // self.batch_size + 1 if (len(self.texts) % self.batch_size) != 0 else 0
    
    def on_epoch_end(self):
        # Shuffle indexes after each epoch if shuffle is set to True.
        if self.shuffle:
            np.random.RandomState(42).shuffle(self.indexes)
            
    def __getitem__(self, idx): # idx -> index batch
        # Retrieves the batch of index.
        indexes = self.indexes[idx * self.batch_size : (idx + 1) * self.batch_size]
        texts = self.texts[indexes]
        
        # With BERT tokenizer's batch_encode_plus batch of both the sentences are
        # encoded together and separated by [SEP] token.
        encoded = self.tokenizer.batch_encode_plus(
            texts.tolist(), # num
            add_special_tokens=True, # not really needed in our case. 
            max_length=max_length, # bert has 512 max length # providing our own
            return_attention_mask=True, # need bcos to pad to max length
            return_token_type_ids=False, # not needed # needed when u have two sentences
            padding='max_length', #pad_to_max_length=True, # needed
            return_tensors="tf",
            truncation=True,
        )
        
        # Convert batch of encoded features to numpy array.
        input_ids = np.array(encoded["input_ids"], dtype="int32")
        attention_masks = np.array(encoded["attention_mask"], dtype="int32")
        
        # Set to true if data generator is used for training/validation.
        if self.include_targets:
            scores = np.array(self.scores[indexes], dtype="float32")
            return [input_ids, attention_masks], scores
        else:
            return [input_ids, attention_masks]
        

In [8]:
# model = tf.keras.models.load_model(load_model_path)

2022-02-04 01:32:19.805167: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:32:19.826341: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:32:19.826796: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:32:19.827629: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow wi

In [6]:
tokenizer = transformers.BertTokenizer.from_pretrained(bert_path, do_lower_case=True)

In [8]:
test_data = CivilDataGenerator(
    comments_to_score_df[text_key].values,
    None, # no target while inferring
    tokenizer,
    batch_size=batch_size,
    shuffle=False,
    include_targets=False # added for inference
)

In [9]:
# with tf.device('/device:GPU:0'):
#     bert_1_biLstm = model.predict(
#         test_data,
#         use_multiprocessing=True, # can only be used when x, y are generators
#         workers=-1,
#         verbose=1,
#     )

# bert_1_biLstm

def evaluate(test_data, queue):
    model = tf.keras.models.load_model(load_model_path)
    with tf.device('/device:GPU:0'):
        preds = model.predict(
            test_data,
            use_multiprocessing=True, # can only be used when x, y are generators
            workers=-1,
            verbose=1,
        )
    queue.put(preds)

q = Queue()
process_eval = multiprocessing.Process(target=evaluate, args=(test_data, q))
process_eval.start()
process_eval.join()
bert_1_biLstm = q.get()

2022-02-04 01:33:58.048012: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:33:58.068184: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:33:58.068561: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:2b:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-02-04 01:33:58.069097: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow wi



In [11]:
bert_1_biLstm

array([[0.1559605 ],
       [0.14655367],
       [0.6794217 ],
       ...,
       [0.34467226],
       [1.29173   ],
       [0.29447895]], dtype=float32)

In [10]:
# del model
gc.collect()

4

In [17]:
## bert-2-bilstm ##
load_model_path = "./output/bert-2-bilstm/bert/final_model" #"../input/jisaw-bert-2-bilstm-epoch2-loss-011490/final_model"
model = tf.keras.models.load_model(load_model_path)

In [18]:
with tf.device('/device:GPU:0'):
    bert_2_biLstm = model.predict(
        test_data,
        use_multiprocessing=True, # can only be used when x, y are generators
        workers=-1,
        verbose=1,
    )

bert_2_biLstm



array([[0.25757378],
       [0.12379802],
       [0.7542055 ],
       ...,
       [0.2717326 ],
       [1.7095122 ],
       [0.35591382]], dtype=float32)

In [19]:
del model, test_data
# free_gpu()
gc.collect()

603828

## UnitaryAI

In [15]:
import numpy as np 
import pandas as pd
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from tqdm import tqdm

In [16]:
import torch
import transformers

MODEL_URLS = {
    "original": "https://github.com/unitaryai/detoxify/releases/download/v0.1-alpha/toxic_original-c1212f89.ckpt",
    "unbiased": "https://github.com/unitaryai/detoxify/releases/download/v0.3-alpha/toxic_debiased-c7548aa0.ckpt",
    "multilingual": "https://github.com/unitaryai/detoxify/releases/download/v0.4-alpha/multilingual_debiased-0b549669.ckpt"
}

PRETRAINED_MODEL = None


def get_model_and_tokenizer(
    model_type, model_name, tokenizer_name, num_classes, state_dict, huggingface_config_path=None
):
    print(model_name)
    model_class = getattr(transformers, model_name)
    model = model_class.from_pretrained(
        pretrained_model_name_or_path=None,
        config=huggingface_config_path or model_type,
        num_labels=num_classes,
        state_dict=state_dict,
        local_files_only=huggingface_config_path is not None,
    )
    tokenizer = getattr(transformers, tokenizer_name).from_pretrained(
        huggingface_config_path or model_type,
        local_files_only=huggingface_config_path is not None,
        # TODO: may be needed to let it work with Kaggle competition
        model_max_length=512,
    )

    return model, tokenizer


def load_checkpoint(model_type="original", checkpoint=None, device='cuda', huggingface_config_path=None):
    if checkpoint is None:
        checkpoint_path = MODEL_URLS[model_type]
        loaded = torch.hub.load_state_dict_from_url(checkpoint_path, map_location=device)
    else:
        loaded = torch.load(checkpoint)
        if "config" not in loaded or "state_dict" not in loaded:
            raise ValueError(
                "Checkpoint needs to contain the config it was trained \
                    with as well as the state dict"
            )
    class_names = loaded["config"]["dataset"]["args"]["classes"]
    # standardise class names between models
    change_names = {
        "toxic": "toxicity",
        "identity_hate": "identity_attack",
        "severe_toxic": "severe_toxicity",
    }
    class_names = [change_names.get(cl, cl) for cl in class_names]
    model, tokenizer = get_model_and_tokenizer(
        **loaded["config"]["arch"]["args"], state_dict=loaded["state_dict"], huggingface_config_path=huggingface_config_path,
    )

    return model, tokenizer, class_names


def load_model(model_type, checkpoint=None):
    if checkpoint is None:
        model, _, _ = load_checkpoint(model_type=model_type)
    else:
        model, _, _ = load_checkpoint(checkpoint=checkpoint)
    return model


class Detoxify:
    """Detoxify
    Easily predict if a comment or list of comments is toxic.
    Can initialize 5 different model types from model type or checkpoint path:
        - original:
            model trained on data from the Jigsaw Toxic Comment
            Classification Challenge
        - unbiased:
            model trained on data from the Jigsaw Unintended Bias in
            Toxicity Classification Challenge
        - multilingual:
            model trained on data from the Jigsaw Multilingual
            Toxic Comment Classification Challenge
        - original-small:
            lightweight version of the original model
        - unbiased-small:
            lightweight version of the unbiased model
    Args:
        model_type(str): model type to be loaded, can be either original,
                         unbiased or multilingual
        checkpoint(str): checkpoint path, defaults to None
        device(str or torch.device): accepts any torch.device input or 
                                     torch.device object, defaults to cpu
        huggingface_config_path: path to HF config and tokenizer files needed for offline model loading
    Returns:
        results(dict): dictionary of output scores for each class
    """

    def __init__(self, model_type="original", checkpoint=PRETRAINED_MODEL, device="cuda", huggingface_config_path=None):
        super(Detoxify, self).__init__()
        self.model, self.tokenizer, self.class_names = load_checkpoint(
            model_type=model_type, checkpoint=checkpoint, device=device, huggingface_config_path=huggingface_config_path,
        )
        self.device = device
        self.model.to(self.device)

    @torch.no_grad()
    def predict(self, text):
        self.model.eval()
        inputs = self.tokenizer(
            text, return_tensors="pt", truncation=True, padding=True
        ).to(self.model.device)
        out = self.model(**inputs)[0]
        scores = torch.sigmoid(out).cpu().detach().numpy()
        results = {}
        for i, cla in enumerate(self.class_names):
            results[cla] = (
                scores[0][i]
                if isinstance(text, str)
                else [scores[ex_i][i].tolist() for ex_i in range(len(scores))]
            )
        return results


def toxic_bert():
    return load_model("original")


def toxic_albert():
    return load_model("original-small")


def unbiased_toxic_roberta():
    return load_model("unbiased")


def unbiased_albert():
    return load_model("unbiased-small")


def multilingual_toxic_xlm_r():
    return load_model("multilingual")

In [17]:
## constants 

## load Unitary AI models path ##
model_info_dict = {
    'original' : {
        "checkpoint":"../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/toxic_original-c1212f89.ckpt", "huggingface_config_path": "../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/bert-base-uncased"
    },
    'unbiased' : {
        "checkpoint":"../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/toxic_debiased-c7548aa0.ckpt", "huggingface_config_path":"../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/roberta-base"
    },
    "multilingual" : {
        "checkpoint":"../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/multilingual_debiased-0b549669.ckpt","huggingface_config_path": "../input/jigsaw-unitary-ai-detoxify-and-models-ckpt/unitaryAI/xlm-roberta-base"
    }
}

test_data_path = "../input/jigsaw-toxic-severity-rating/comments_to_score.csv"
val_data_path = "../input/jigsaw-toxic-severity-rating/validation_data.csv"
comment_key = "text"
comment_id_key = "comment_id"
batch_size = 32
target_key = "score"

In [18]:
whole_df = comments_to_score_df
whole_df.shape

In [19]:
class CustomDataset(Dataset):
    def __init__(self, comments, targets, include_target=True):
        self.comments = comments
        self.targets = targets
        self.include_target = include_target
    
    def __len__(self):
        return self.comments.shape[0]
    
    def __getitem__(self, idx):
        comment = self.comments[idx]
        if self.include_target == True:
            return comment, self.targets[idx]
        else:
            return comment


In [20]:
pytorch_dataset = CustomDataset(whole_df[comment_key].values, None, include_target=False)
test_dataloader = DataLoader(pytorch_dataset, batch_size=batch_size, shuffle=False)

In [21]:
models = []
for i, model_name in enumerate(model_info_dict):
    model = Detoxify(model_name, checkpoint=model_info_dict[model_name]["checkpoint"], huggingface_config_path=model_info_dict[model_name]["huggingface_config_path"])
    models.append(model)

In [22]:
unitAI_preds = np.zeros((whole_df.shape[0], len(model_info_dict)))
for i, model_name in enumerate(model_info_dict):
    model = models[i] #Detoxify(model_name, checkpoint=model_info_dict[model_name]["checkpoint"], huggingface_config_path=model_info_dict[model_name]["huggingface_config_path"])
    lastidx=0
    for texts in tqdm(test_dataloader):
        preds_dict = model.predict(texts) # could do combination of weights here as well
        # print(preds_dict)
        for key in preds_dict:
            unitAI_preds[lastidx: lastidx+len(texts), i]+=preds_dict[key]
        
        lastidx+=len(texts)

In [23]:
unitAI_preds, unitAI_preds.shape

In [24]:
del models, pytorch_dataset, test_dataloader
torch.cuda.empty_cache()
gc.collect()

## BILSTM | ruddit_only

In [25]:
# del comments_to_score_df
# gc.collect()

In [26]:
# import pandas as pd
# import numpy as np
# import torch
# from torchtext import vocab
# from torchtext.legacy import data, datasets
# from torchtext.legacy.data import BucketIterator, TabularDataset , Dataset
# from torch.utils.data import Sampler, Subset#, Dataset
# from typing import Sequence, Optional
# from torch import nn
# from sklearn.model_selection import KFold
# from tqdm import tqdm

In [27]:
# device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [28]:
# ## Constants ##
# comment_key = "txt"
# target_key = "score"
# train_data_path = "./train/clean_ruddit_with_text.csv"
# embedding_name = "glove.840B.300d" #"glove.6B.100d"
# embedding_dim = 300 # 100
# hidden_dim = 256
# output_dim = 1
# n_layers = 2
# k_folds = 5
# n_epochs = 7
# batch_size = 256
# dropout_rate = 0.5
# output_model_path = "./output/bilstm_ruddit_only/model_%s_%s" #"./output/bilstm_civil_only/model_%s_%s" # loss, more info

In [29]:
# class BiLSTM(nn.Module):
#     def __init__(self, embedding_vocab: vocab, hidden_dim: int, output_dim: int, n_layers: int, # vocab_size: int, embedding_dim: int
#         bidirectional: bool, dropout: float, pad_idx: Optional[int]):
#         super().__init__()
#         vocab_size, embedding_dim = embedding_vocab.vectors.size()
#         self.embedding_layer = nn.Embedding.from_pretrained(embedding_vocab.vectors, freeze=True, padding_idx=pad_idx) # not training embeddding
#         # self.embedding_layer =  nn.Embedding(vocab_size, embedding_dim, padding_idx=pad_idx)
#         self.lstm = nn.LSTM(embedding_dim,
#                            hidden_dim,
#                            num_layers=n_layers,
#                            bidirectional=bidirectional,
#                            batch_first=True, # imp
#                            dropout=dropout)
#         self.fc = nn.Linear(hidden_dim * 2, output_dim) # bcos birectional
#         self.dropout_emb = nn.Dropout(dropout) # not sure if same layer object can be used
#         self.dropout_fc = nn.Dropout(dropout)
 
#     def forward(self, text, examples_lengths): # text is already padded
#         embedded = self.dropout_emb(self.embedding_layer(text))
#         pack_out = nn.utils.rnn.pack_padded_sequence(embedded, examples_lengths.cpu(), batch_first=True)#.to(device)
#         out_lstm, (hidden, cell) = self.lstm(embedded) # hidden -> (D∗num_layers, batch , hidden_dim) # D = 2 if bidirectional=True otherwise 1
#         h1, h2 = hidden[-2, :, :], hidden[-1, :, :]# -2, -1 is taking last hidden state (twice bcos bidirectional) # so h1,h2 -> (batch, hidden_dim)
#         x = self.dropout_fc(torch.cat((h1, h2), dim=1)) # concatenate along hidden_dim # x -> (batch, hidden_dim*2)
#         return self.fc(x) # feel like too many dropouts


In [30]:
# model_load_paths = ["../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/model_0.07617800012230873_fold_4",
#                     "../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/model_0.08099494650959968_fold_1",
#                    "../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/model_0.08217549547553063_fold_0",
#                    "../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/model_0.08438036367297172_fold_3",
#                    "../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/model_0.09249704629182816_fold_2",
#                    ]
# vocab_path = "../input/bilstm-ruddit-only/bilstm_not_clean_ruddit_only/bilstm_not_clean_ruddit_only/ruddit_vocab"

In [31]:
# text_field = torch.load(vocab_path) # dont build_vocab now

In [32]:
# models = []
# for model_path in model_load_paths:
#     model = BiLSTM(text_field.vocab, hidden_dim, output_dim=1, n_layers=n_layers, bidirectional=True, dropout=dropout_rate, pad_idx=text_field.vocab.stoi[text_field.pad_token])
#     model.load_state_dict(torch.load(model_path, map_location=device))
#     models.append(model)
    
# models

In [33]:
# comment_id_field = data.Field(dtype=torch.int64, batch_first=True, sequential=False, use_vocab=False, preprocessing=int)
# fields = [('comment_id', comment_id_field), ('text', text_field)]
# fields

In [34]:
# comments_to_score = TabularDataset(
#     path="../input/jigsaw-toxic-severity-rating/comments_to_score.csv",
#     format='csv',
#     fields=fields,
#     skip_header=True,
# )
# len(comments_to_score.examples)

In [35]:
# test_iter, = BucketIterator.splits((comments_to_score,),
#                                 sort_key=lambda x: len(x.text),  # sort by s attribute (quote)
#                                 sort_within_batch=True,
#                                 batch_size=32,
#                                 device=device)

In [36]:
# bilstm_ruddit_only_pred = np.zeros((len(comments_to_score.examples), len(models))) # (examples, models)
# comment_id_list = []
# last_idx = 0
# for batch in tqdm(test_iter):
#     text, examples_len = batch.text
#     batch_len = len(text)
#     for i, model in enumerate(models):
#         model.to(device)
#         preds = model(text, examples_len)
#         preds = torch.sigmoid(preds) # not necessary since ranking is needed
#         bilstm_ruddit_only_pred[last_idx:last_idx + batch_len, i] = preds.squeeze(1).cpu().detach().numpy() # alternating between gpu and cpu # better would be all gpu and then cpu
# #         model.to('cpu')
#     comment_id_list.extend(batch.comment_id.cpu().detach().numpy())
#     last_idx += batch_len
# #     print(text) # many out of vocab :/

# len(comment_id_list), bilstm_ruddit_only_pred.shape

In [37]:
# # works!!!
# del models, comments_to_score, test_iter
# torch.cuda.empty_cache()
# gc.collect()

In [38]:
# bilstm_ruddit_only_df = pd.DataFrame({"comment_id": comment_id_list, "score": np.mean(bilstm_ruddit_only_pred, axis=1)}).sort_values(by=["comment_id"])
# bert_1_biLstm_df = pd.DataFrame({"comment_id": comments_to_score_df["comment_id"].values, "score": bert_1_biLstm.squeeze(1)}).sort_values(by=["comment_id"])
# bert_2_biLstm_df = pd.DataFrame({"comment_id": comments_to_score_df["comment_id"].values, "score": bert_2_biLstm.squeeze(1)}).sort_values(by=["comment_id"])
# unitaryAI_original_df = pd.DataFrame({"comment_id": comments_to_score_df["comment_id"].values, "score": unitAI_preds[:, 0]}).sort_values(by=["comment_id"])
# unitaryAI_unbiased_df = pd.DataFrame({"comment_id": comments_to_score_df["comment_id"].values, "score": unitAI_preds[:, 1]}).sort_values(by=["comment_id"])
# unitaryAI_multilingual_df = pd.DataFrame({"comment_id": comments_to_score_df["comment_id"].values, "score": unitAI_preds[:, 2]}).sort_values(by=["comment_id"])
# bilstm_ruddit_only_df.shape, bert_1_biLstm_df.shape, bert_2_biLstm_df.shape, unitaryAI_original_df.shape, unitaryAI_unbiased_df.shape, unitaryAI_multilingual_df.shape

In [39]:
# # weights = {'bilstm_ruddit_only': 4.754776548389675, 'bert_1_bilstm': 0.0563325993719159, 'bert_2_bilstm': 0.0005159065135504051, 'multilingual': 0.08595313014460652, 'original': 0.10773060655864747, 'unbiased': 0.034223939582444046}
# weights = {'bert_1_bilstm': 0.2720956202305736, 'bert_2_bilstm': 1.3682913017691893, 'multilingual': 1.3008939861065933, 'original': 1.5262021808926747, 'unbiased': 1.8944012148054452}
# models_preds = {
# #     'bilstm_ruddit_only': bilstm_ruddit_only_df,
#     'bert_1_bilstm': bert_1_biLstm_df,
#     'bert_2_bilstm': bert_2_biLstm_df,
#     'multilingual': unitaryAI_multilingual_df,
#     'original': unitaryAI_original_df,
#     'unbiased': unitaryAI_unbiased_df, 
# } # same keys

In [40]:
# final_scores = np.zeros(len(comments_to_score_df))
# for key in models_preds:
#     final_scores += models_preds[key]["score"] #* weights[key]
    
# final_scores

## Ensemble Tuning

In [3]:
free_gpu()

In [42]:
# np.

In [1]:
all_preds = np.vstack((bert_1_biLstm.squeeze(1), bert_2_biLstm.squeeze(1), unitAI_preds[:, 0], unitAI_preds[:, 1], unitAI_preds[:, 2]))
all_preds = all_preds.T
all_preds.shape
# all_preds[:,2], unitAI_preds[:, 0] # cross check

In [4]:
import tensorflow as tf
model = tf.keras.models.load_model("./output/ensemble-nn-tuning/final_model")

In [None]:
with tf.device('/device:GPU:0'):
    final_scores = model.predict(all_preds).squeeze(1)
    
final_scores.shape

In [None]:
final_submission_df = pd.DataFrame({"comment_id": bilstm_ruddit_only_df["comment_id"], "score": final_scores})
print(final_submission_df.shape)
final_submission_df.to_csv("submission.csv", index=False)