In [1]:
from tensorflow import keras
import re 
import bert 
import tensorflow_hub as hub 
import tensorflow as tf 

In [2]:
# Load the models 
avgBrightnessModel = keras.models.load_model('GIF models/avg_brightness_prediction_model.h5')
entropyModel = keras.models.load_model('GIF models/entropy_prediction_model.h5')
framerateModel = keras.models.load_model('GIF models/framerate_prediction_model.h5')
avgSharpnessModel = keras.models.load_model('GIF models/avg_sharpness_prediction_model.h5')
contrastModel = keras.models.load_model('GIF models/contrast_prediction_model.h5')
durationModel = keras.models.load_model('GIF models/duration_prediction_model.h5')
faceCountModel = keras.models.load_model('GIF models/face_count_prediction_model.h5')
numberOfPixelsModel = keras.models.load_model('GIF models/number_of_pixels_prediction_model.h5')
pixelNoiseModel = keras.models.load_model('GIF models/pixel_noise_prediction_model.h5')
qualityModel = keras.models.load_model('GIF models/quality_prediction_model.h5')


In [3]:
# Clean text 
# definition for function for removing html tags 
TAG_RE = re.compile(r'<[^>]+>')

def remove_tags(text):
    return TAG_RE.sub('', text)

# definition for function for remove any punctuations and special characters
def preprocess_text(raw_tweaet):
    # Removing html tags
    tweet = remove_tags(raw_tweaet)
    # Removing html tags
    tweet = re.sub('[^a-zA-Z]', '', tweet)
    # Removing html tags
    tweet = re.sub(r"\s+[a-zA-Z]\s+", ' ', tweet)
    # Removing multiple spaces
    tweet = re.sub(r'\s+', ' ', tweet)
    return tweet

In [4]:
# Create a tokenizer 
BertTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3", trainable=False)
vocabulary_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
# .numpy(): converts a tensor object into an numpy.ndarray
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

# Definition for function for convert tweet to ids 
def tokenize_tweets(text_tweets):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text_tweets))

In [5]:
# Clean, tokenized, and format the raw text 
def format_text(raw_text):
    clean_text = preprocess_text(raw_text)
    tokenized_text = tokenize_tweets(clean_text)
    formatted_text = tf.ragged.constant([tokenized_text], dtype=tf.int32)
    return formatted_text

In [12]:
# Take in a processed text, output predicted GIF feature as a list 
def predict_GIF_features(formatted_input):
    brightness = avgBrightnessModel.predict(formatted_input)[0][0]
    entropy = entropyModel.predict(formatted_input)[0][0]
    framerate = framerateModel.predict(formatted_input)[0][0]
    avgSharpness = avgSharpnessModel.predict(formatted_input)[0][0]
    contrast = contrastModel.predict(formatted_input)[0][0]
    duration = durationModel.predict(formatted_input)[0][0]
    faceCount = faceCountModel.predict(formatted_input)[0][0]
    numberOfPixels = numberOfPixelsModel.predict(formatted_input)[0][0]
    pixelNoise = pixelNoiseModel.predict(formatted_input)[0][0]
    quality = qualityModel.predict(formatted_input)[0][0]
    summary = {'Average_Brightness': brightness, 
               'Entropy': entropy, 
               'Framerate': framerate, 
               'Avg_Sharpness': avgSharpness, 
               'Contrast': contrast, 
               'Duration': duration, 
               'Face_Count': faceCount, 
               'Number_Of_Pixels': numberOfPixels, 
               'Pixel_Noise': pixelNoise, 
               'Quality': quality,}
    return summary 

In [13]:
# Demonstration 
result = predict_GIF_features(format_text("Happy to meet you at last, Yully."))
print(result)