In [None]:
import numpy as np
import pandas as pd

import matplotlib.pyplot as plt

import tensorflow as tf

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, Embedding
from tensorflow.keras.layers.experimental.preprocessing import TextVectorization

In [None]:
train = pd.read_csv('../input/commonlitreadabilityprize/train.csv')
test = pd.read_csv('../input/commonlitreadabilityprize/test.csv')
train=train.drop(['url_legal','license'],axis=1)
test=test.drop(['url_legal','license'],axis=1)
train['sl']=train["excerpt"].apply(lambda x : len(x.split(' '))) #sentence length
test['sl']=test["excerpt"].apply(lambda x : len(x.split(' ')))

In [None]:
print(train.head())
print('\n')
print(train.info())
print('\n')
print(train.shape)
print('\n')
print(train['target'].describe())
max_len=train['sl'].max()
max_len

In [None]:
print(test.head())
print(test.shape)

In [None]:
x=train['excerpt'].to_numpy()
x=tf.convert_to_tensor(x)
y=train['target'].to_numpy()
y=tf.convert_to_tensor(y)
tx=test['excerpt'].to_numpy()
tx=tf.convert_to_tensor(tx)
print(x[:2])
print('\n')
print(y[:2])
print('\n')
print(tx[:2])

In [None]:
vocab_size = 10000
sequence_length = max_len

vectorize_layer = TextVectorization(
    max_tokens=vocab_size,
    output_mode='int',
    output_sequence_length=sequence_length)

vectorize_layer.adapt(x)

In [None]:
embedding_dim=16

model = Sequential([
    vectorize_layer,
    Embedding(vocab_size, embedding_dim, name="embedding"),
    tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64)),
    tf.keras.layers.Dense(64, activation='relu'),
    tf.keras.layers.Dense(1)
])

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanSquaredError(),
              metrics=['accuracy'])

In [None]:
history=model.fit(x,y,epochs=200)

In [None]:
model.summary()

In [None]:
history.history

In [None]:
acc = history.history['accuracy']

loss = history.history['loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.title('Acuuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.legend(loc='upper right')
plt.ylabel('MSE')
plt.title('Training Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
y_pred=model.predict(tx)

In [None]:
tp=[0]*len(test)
for i in range(len(tp)):
    tp[i]=y_pred[i][0]

In [None]:
data={
    'id':test['id'],
    'target':tp
}
df=pd.DataFrame(data)
print(df)

In [None]:
df.to_csv('submission.csv', index=False)