In [None]:
!pip install ../input/tensorflow-text243/tensorflow_text-2.4.3-cp37-cp37m-manylinux1_x86_64.whl

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd

import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_datasets as tfds
import tensorflow_text as text
import matplotlib.pyplot as plt
import gc
gc.enable()

In [None]:
train_df=pd.read_csv("../input/commonlitreadabilityprize/train.csv")
train_df.head()

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM, InputLayer, Bidirectional,Dropout
from keras.optimizers import Adam

def UniversalSentenceModel():
    embedding = "../input/universal-sentence-embedding"
    embedding_model = hub.KerasLayer(embedding, input_shape=[], 
                           dtype=tf.string, trainable=True)
    model = tf.keras.Sequential()
    model.add(embedding_model)
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(256, activation='tanh'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(128, activation='tanh'))
    model.add(tf.keras.layers.Dropout(0.5))
    model.add(tf.keras.layers.Dense(1, name="predictions"))
    return model


In [None]:
from keras import backend as K
import tensorflow_addons as tfa





def root_mean_squared_error(y_true, y_pred):
    return K.sqrt(K.mean(K.square(y_pred - y_true))) 

def get_optimizer(model):
    s1 = tf.keras.optimizers.schedules.PiecewiseConstantDecay([750,1500], [1e-4,5e-5,1e-5])

    s2 = tf.keras.optimizers.schedules.PiecewiseConstantDecay([750,1500], [1e-3,5e-4,1e-3])
    DISC_LR = [s1, s2]
    optimizers = [tf.keras.optimizers.Adam(learning_rate=DISC_LR[0]),
                  tf.keras.optimizers.Adam(learning_rate=DISC_LR[1])
                  ]
    optimizers_and_layers = [(optimizers[0], model.layers[0]), (optimizers[1], model.layers[1:])] 
    optimizer = tfa.optimizers.MultiOptimizer(optimizers_and_layers)
    return optimizer

In [None]:
epochs = 20
from tensorflow.keras.callbacks import ReduceLROnPlateau, EarlyStopping, ModelCheckpoint
from sklearn.model_selection import KFold



NUM_FOLDS=5
kfold = KFold(n_splits=NUM_FOLDS, random_state=1, shuffle=True)

for fold, (train_indices, val_indices) in enumerate(kfold.split(train_df)): 

    
    print(f"\nFold {fold + 1}/{NUM_FOLDS}")
    model_path = f"model_{fold + 1}.pth"
    X_train = train_df.loc[train_indices].excerpt.to_list()
    X_test = train_df.loc[val_indices].excerpt.to_list()
    y_train = train_df.loc[train_indices].target.values
    y_test = train_df.loc[val_indices].target.values


    train_ds = tf.data.Dataset.from_tensor_slices((X_train,y_train))
    train_ds = train_ds.shuffle(1024)
    train_ds = train_ds.batch(32)

    val_ds = tf.data.Dataset.from_tensor_slices((X_test,y_test))
    val_ds = val_ds.batch(32)
    model = UniversalSentenceModel()
    optimizer = get_optimizer(model)
    model.compile(optimizer = optimizer,
                    loss = tf.keras.losses.LogCosh(),
                    metrics = [root_mean_squared_error]
                    )
    
    checkpoint = ModelCheckpoint(
        f"model_{fold}.h5", monitor='val_loss', mode='min',
         save_best_only=True, save_weights_only=True)


    model.fit(train_ds, validation_data=val_ds, epochs=epochs, callbacks=checkpoint)
    del model
    gc.collect()

In [None]:
test = pd.read_csv("../input/commonlitreadabilityprize/test.csv")
test_text = test["excerpt"].tolist()

In [None]:
all_predictions = np.zeros((NUM_FOLDS, len(test)))
model = UniversalSentenceModel()
for index in range(NUM_FOLDS):            
    model_path = f"model_{index}.h5"
    print(f"\nUsing {model_path}")
                        
    model = UniversalSentenceModel()
    model.load_weights(model_path)
    
    all_predictions[index] = model.predict(test_text).flatten()
    
    del model
    gc.collect()

In [None]:
predictions = all_predictions.mean(axis=0)
submission_df = pd.read_csv("/kaggle/input/commonlitreadabilityprize/sample_submission.csv")
submission_df.target = predictions
print(submission_df)
submission_df.to_csv("submission.csv", index=False)