In [1]:
import tensorflow as tf 
from tensorflow import keras 
import tensorflow_addons as tfa 
import pandas as pd
import numpy as np 
from models import load_mf_model
#import gensim.downloader as api
#from utils import load_annotations, load_transcriptions, process_text, preprocess_text, loss_val_graph

 The versions of TensorFlow you are currently using is 2.6.0 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


### Load data

In [2]:
AUTOTUNE = tf.data.AUTOTUNE

# Train
scene_train_ds = tf.data.experimental.load('./data/fullscene/train_ds/')
face_train_ds  = tf.data.experimental.load('./data/faces/train_ds/')
audio_train_ds = tf.data.experimental.load('./data/audio/train_ds/')
text_train_ds  = tf.data.experimental.load('./data/text/train_ds/').batch(batch_size=32)

scene_xtrain = scene_train_ds.map(lambda x,y: x)
face_xtrain  = face_train_ds.map(lambda x,y: x)
audio_xtrain = audio_train_ds.map(lambda x,y: x)
text__xtrain = text_train_ds.map(lambda x,y: x)
y_train      = scene_train_ds.map(lambda x,y: y)

train_ds = tf.data.Dataset.zip(((scene_xtrain, face_xtrain, audio_xtrain, text__xtrain), y_train)).shuffle(buffer_size=1000).prefetch(buffer_size=AUTOTUNE)


# Valid
scene_valid_ds = tf.data.experimental.load('./data/fullscene/valid_ds/')
face_valid_ds  = tf.data.experimental.load('./data/faces/valid_ds/')
audio_valid_ds = tf.data.experimental.load('./data/audio/valid_ds') 
text_valid_ds  = tf.data.experimental.load('./data/text/valid_ds/').batch(batch_size=32)

scene_xvalid = scene_valid_ds.map(lambda x,y: x)
face_xvalid  = face_valid_ds.map(lambda x,y: x)
audio_xvalid = audio_valid_ds.map(lambda x,y: x)
text_xvalid  = text_valid_ds.map(lambda x,y: x)
y_valid      = scene_valid_ds.map(lambda x,y: y)

valid_ds = tf.data.Dataset.zip(((scene_xvalid, face_xvalid, audio_xvalid, text_xvalid), y_valid)).shuffle(buffer_size=1000).prefetch(buffer_size=AUTOTUNE)

train_ds, valid_ds

(<PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>,
 <PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>)

### Load model

In [3]:
mf_model = load_mf_model()

### Load weights

In [5]:
mf_model.load_weights('../weights/mf_model/')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x2a1caa41ee0>

In [6]:
loss, mae = mf_model.evaluate(valid_ds)
(1-mae)*100



91.2941500544548

### Load data

In [7]:
scene_test_ds = tf.data.experimental.load('./data/fullscene/test_ds/')
face_test_ds  = tf.data.experimental.load('./data/faces/test_ds/')
audio_test_ds = tf.data.experimental.load('./data/audio/test_ds') 
text_test_ds  = tf.data.experimental.load('./data/text/test_ds/').batch(batch_size=32)


scene_xtest = scene_test_ds.map(lambda x,y: x)
face_xtest  = face_test_ds.map(lambda x,y: x)
audio_xtest = audio_test_ds.map(lambda x,y: x)
text_xtest  = text_test_ds.map(lambda x,y: x)

y_test      = scene_test_ds.map(lambda x,y: y)

test_ds = tf.data.Dataset.zip(((scene_xtest, face_xtest, audio_xtest, text_xtest), y_test)).shuffle(buffer_size=1000).prefetch(buffer_size=AUTOTUNE)

test_ds

<PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>

In [8]:
loss, mae = mf_model.evaluate(test_ds)
(1-mae)*100



91.41548722982407

In [8]:
train_ds, valid_ds

(<PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>,
 <PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>)

In [4]:
import datetime
t = datetime.datetime.now().strftime("%m%d_%H%M%S")

early_stopping = keras.callbacks.EarlyStopping(patience=10, verbose=0)
check_point    = keras.callbacks.ModelCheckpoint(filepath='./weights/mf/'+str(t)+'/mf.t5',
                             monitor='val_mae',
                             mode='min',
                             save_best_only=True,
                             save_weights_only=True,
                             verbose=0)

optimizer = tfa.optimizers.RectifiedAdam()
mf_model.compile(loss='mse', optimizer=optimizer, metrics=['mae'])

### Retrain model

In [9]:

history = mf_model.fit(train_ds, validation_data=valid_ds, batch_size=32, epochs=100, callbacks=[early_stopping, check_point])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


### Load weights

In [11]:
mf_model.load_weights('./weights/mf/0226_221130/mf.t5')

<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x226d651ad90>

## Evaluation

### Validation data

In [12]:
AUTOTUNE = tf.data.AUTOTUNE
scene_valid_ds = tf.data.experimental.load('./data/fullscene/valid_ds/')
face_valid_ds  = tf.data.experimental.load('./data/faces/valid_ds/')
audio_valid_ds = tf.data.experimental.load('./data/audio/valid_ds') 
text_valid_ds  = tf.data.experimental.load('./data/text/valid_ds/').batch(batch_size=32)

scene_xvalid = scene_valid_ds.map(lambda x,y: x)
face_xvalid  = face_valid_ds.map(lambda x,y: x)
audio_xvalid = audio_valid_ds.map(lambda x,y: x)
text_xvalid  = text_valid_ds.map(lambda x,y: x)
y_valid      = scene_valid_ds.map(lambda x,y: y)

valid_ds = tf.data.Dataset.zip(((scene_xvalid, face_xvalid, audio_xvalid, text_xvalid), y_valid)).prefetch(buffer_size=AUTOTUNE)


In [9]:
loss, mae = mf_model.evaluate(valid_ds)



In [13]:
from sklearn.metrics import mean_absolute_error 

y_true = np.concatenate([y for x,y in valid_ds], axis=0)
y_pred = mf_model.predict(valid_ds)

mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
(1-mae)*100, (1-np.mean(mae))*100

(array([91.35425, 92.05732, 91.32551, 91.67088, 91.0415 ], dtype=float32),
 91.48988947272301)

### Test data

In [14]:
scene_test_ds = tf.data.experimental.load('./data/fullscene/test_ds/')
face_test_ds  = tf.data.experimental.load('./data/faces/test_ds/')
audio_test_ds = tf.data.experimental.load('./data/audio/test_ds') 
text_test_ds  = tf.data.experimental.load('./data/text/test_ds/').batch(batch_size=32)


scene_xtest = scene_test_ds.map(lambda x,y: x)
face_xtest  = face_test_ds.map(lambda x,y: x)
audio_xtest = audio_test_ds.map(lambda x,y: x)
text_xtest  = text_test_ds.map(lambda x,y: x)

y_test      = scene_test_ds.map(lambda x,y: y)

test_ds = tf.data.Dataset.zip(((scene_xtest, face_xtest, audio_xtest, text_xtest), y_test)).prefetch(buffer_size=AUTOTUNE) #.shuffle(buffer_size=1000)

test_ds

<PrefetchDataset shapes: (((None, 10, 224, 224, 3), (None, 10, 224, 224, 3), (None, 15, 128), (None, 50)), (None, 5)), types: ((tf.float32, tf.float32, tf.float32, tf.int32), tf.float32)>

In [15]:
y_true = np.concatenate([y for x,y in test_ds], axis=0)
y_pred = mf_model.predict(test_ds)

mae = mean_absolute_error(y_true, y_pred, multioutput='raw_values')
(1-mae)*100, (1-np.mean(mae))*100

(array([91.42722, 92.14998, 91.67239, 91.44349, 91.34789], dtype=float32),
 91.6081964969635)

In [15]:
(1-mae)*100

91.60818979144096

In [None]:
import pickle
with open('./histories/mf.pkl', 'wb') as f:
    pickle.dump(history.history, f)