In [None]:
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import TFAutoModelForSequenceClassification, AutoTokenizer

In [None]:
tf.keras.backend.clear_session()

In [None]:
def regular_encode(texts, tokenizer, max_len=256):
    enc_di = tokenizer.batch_encode_plus(
        texts,
        return_attention_mask=True,
        return_token_type_ids=False,
        padding='max_length',
        max_length=max_len,
        truncation=True,
    )

    return {
            "input_ids": np.array(enc_di["input_ids"]),
            "attention_mask": np.array(enc_di["attention_mask"]),
        }

def generate_predictions(model_path, max_len, file_name, x_column="excerpt"):
    model = TFAutoModelForSequenceClassification.from_pretrained(model_path, from_pt=True)
    tokenizer = AutoTokenizer.from_pretrained(model_path)

    df = pd.read_csv(file_name)
    
    dataset = regular_encode(df[x_column].tolist(), tokenizer=tokenizer, max_len=max_len)

    input_ids = tf.keras.layers.Input((max_len,), dtype=tf.int32, name="input_ids")
    attention_mask = tf.keras.layers.Input((max_len,), dtype=tf.int32, name="attention_mask")
    output_layer = model(input_ids=input_ids, attention_mask=attention_mask)
    reg_model = tf.keras.Model(inputs=[input_ids, attention_mask], outputs=output_layer)

    final_output = reg_model.predict(dataset, verbose=1, batch_size=16)
    final_output = sum(final_output.logits.tolist(), [])
    
    return np.array(final_output)

In [None]:
preds1 = generate_predictions("../input/modelf1/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")
preds2 = generate_predictions("../input/modelf2/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")
preds3 = generate_predictions("../input/modelf3/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")
preds4 = generate_predictions("../input/modelf4/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")
preds5 = generate_predictions("../input/modelf5/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")
preds6 = generate_predictions("../input/a81657/", max_len=256, file_name="../input/commonlitreadabilityprize/test.csv")


In [None]:
weights_pos = [2.29301865e-08 ,9.18492143e-02 ,3.56011564e-01, 5.34926853e-09,
 8.52853500e-02 ,4.66853844e-01]


In [None]:
weights = weights_pos
preds = preds1*weights[0] + preds2*weights[1] + preds3*weights[2]+ preds4*weights[3] + preds5*weights[4]+ preds6*weights[5]

In [None]:
submission = pd.read_csv("../input/commonlitreadabilityprize/sample_submission.csv")
submission.target = preds
submission.to_csv("submission.csv", index=False)

Original Notebook in Pytorch by @abhishek: Original Notebook: https://www.kaggle.com/abhishek/yum-yum-yum

Public Score: 0.488 