In [1]:
import tensorflow as tf 
from tensorflow import keras 
import tensorflow_addons as tfa 
import pandas as pd
import numpy as np 
from sklearn.metrics import mean_absolute_error
import gensim.downloader as api

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


### Load text data <br>

In [2]:

train_ds = tf.data.experimental.load('./data/text/train_ds/').shuffle(buffer_size=1000, seed=42).batch(batch_size=32).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
valid_ds = tf.data.experimental.load('./data/text/valid_ds/').shuffle(buffer_size=1000, seed=42).batch(batch_size=32).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds  = tf.data.experimental.load('./data/text/test_ds/').shuffle(buffer_size=1000, seed=42).batch(batch_size=32).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

train_ds, valid_ds

(<PrefetchDataset shapes: ((None, 50), (None, 5)), types: (tf.int32, tf.float32)>,
 <PrefetchDataset shapes: ((None, 50), (None, 5)), types: (tf.int32, tf.float32)>)

### Load embed_matrix

In [3]:
embed_matrix = np.load('./data/text/embed_matrix.npy')

### Build Text model

In [4]:
vocab_size = 11052
sentlen    = 50
 
inputs = keras.layers.Input(shape=(sentlen))
embed  = keras.layers.Embedding(input_dim=vocab_size, output_dim=100, embeddings_initializer=keras.initializers.Constant(embed_matrix),input_length=sentlen, trainable=False)(inputs)

x = keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu')(embed)
x = keras.layers.Conv1D(filters=8, kernel_size=3, activation='relu')(x)
x = keras.layers.Flatten()(x)
x = keras.layers.Dense(50, activation='relu')(x)

y = keras.layers.Conv1D(filters=32, kernel_size=3, activation='relu')(embed)
y = keras.layers.Conv1D(filters=16, kernel_size=3, activation='relu')(y)
y = keras.layers.Flatten()(y)
y = keras.layers.Dense(50, activation='relu')(y)

z = keras.layers.Concatenate()([x,y])

z = keras.layers.Dense(256, activation='relu')(z)
z = keras.layers.Dense(5, activation='sigmoid')(z)




text_model = keras.models.Model(inputs=inputs, outputs=z, name='text_model')
text_model.compile(loss='mse', optimizer=tfa.optimizers.RectifiedAdam(), metrics=['mae'])

### Compile & Train model

In [12]:
import datetime
t = datetime.datetime.now().strftime("%m%d_%H%M%S")

early_stopping = keras.callbacks.EarlyStopping(patience=10, verbose=0)
check_point    = keras.callbacks.ModelCheckpoint(filepath='./weights/text/'+str(t)+'/text.t5',
                             monitor='val_mae',
                             mode='min',
                             save_best_only=True,
                             save_weights_only=True,
                             verbose=0)

optimizer = tfa.optimizers.RectifiedAdam()

history = text_model.fit(train_ds, validation_data=valid_ds, batch_size=32, epochs=100, callbacks=[early_stopping, check_point])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100


### Load weights

In [5]:
text_model.load_weights('./weights/text/0226_072643/text.t5')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x1da04ec27c0>

## Evaluation

### Validation data

In [8]:

valid_ds = tf.data.experimental.load('./data/text/valid_ds/').batch(batch_size=32).cache().prefetch(buffer_size=tf.data.AUTOTUNE)
test_ds  = tf.data.experimental.load('./data/text/test_ds/').batch(batch_size=32).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

valid_ds, test_ds

(<PrefetchDataset shapes: ((None, 50), (None, 5)), types: (tf.int32, tf.float32)>,
 <PrefetchDataset shapes: ((None, 50), (None, 5)), types: (tf.int32, tf.float32)>)

In [11]:
y_true = np.concatenate([y for x,y in valid_ds], axis=0)
y_pred = text_model.predict(valid_ds)

mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
(1-mae)*100, (1-np.mean(mae))*100

(array([88.668   , 87.64489 , 88.154854, 89.908455, 88.18838 ],
       dtype=float32),
 88.51291686296463)

In [6]:
loss, mae = text_model.evaluate(valid_ds)
(1-mae)*100



88.51291239261627

### Test data

In [9]:
y_true = np.concatenate([y for x,y in test_ds], axis=0)
y_pred = text_model.predict(test_ds)

mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
(1-mae)*100, (1-np.mean(mae))*100

(array([88.529396, 87.800285, 87.93243 , 89.40246 , 88.01859 ],
       dtype=float32),
 88.33663240075111)

In [7]:
loss, mae = text_model.evaluate(test_ds)
(1-mae)*100



88.33663538098335