In [1]:
import tensorflow as tf 
import tensorflow_hub as hub 
from tensorflow.keras import layers 
import bert 
import re 
# re — Regular expression operations
import math
import csv
import pandas as pd                     
import cv2 as cv 
from PIL import Image, ImageSequence
from tensorflow.keras import losses
from plotly.subplots import make_subplots
import plotly.graph_objects as go
import sys
from tensorflow import keras


In [22]:
data = pd.read_csv("./tgif-v1.0.tsv", sep='\t')
data.isnull().values.any()
gif_links = list(data.y.values)
raw_tweets = list(data.x.values)
data.shape

(125782, 2)

# Tweet Pre-Process 

## Remove Special Char

In [17]:
# definition for function for removing html tags 
TAG_RE = re.compile(r'<[^>]+>')
def remove_tags(text):
    return TAG_RE.sub('', text)

In [18]:
# definition for function for remove any punctuations and special characters
def preprocess_text(raw_tweaet):
    # Removing html tags
    tweet = remove_tags(raw_tweaet)
    # Removing html tags
    tweet = re.sub('[^a-zA-Z]', '', tweet)
    # Removing html tags
    tweet = re.sub(r"\s+[a-zA-Z]\s+", ' ', tweet)
    # Removing multiple spaces
    tweet = re.sub(r'\s+', ' ', tweet)
    return tweet

In [23]:
# run the preprocess_text function to clean tweets list 
tweets = [] 
for tweet in raw_tweets[:10000]:
    tweets.append(preprocess_text(tweet))

## Tokenizing 

In [21]:
# Create a tokenizer 
BertTokenizer = bert.bert_tokenization.FullTokenizer
bert_layer = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/3", trainable=False)
vocabulary_file = bert_layer.resolved_object.vocab_file.asset_path.numpy()
# .numpy(): converts a tensor object into an numpy.ndarray
to_lower_case = bert_layer.resolved_object.do_lower_case.numpy()
tokenizer = BertTokenizer(vocabulary_file, to_lower_case)

In [8]:
# Definition for function for convert tweet to ids 
def tokenize_tweets(text_tweets):
    return tokenizer.convert_tokens_to_ids(tokenizer.tokenize(text_tweets))

In [9]:
# run the tokenize_tweets on tweets 
tokenized_tweets = [tokenize_tweets(tweet) for tweet in tweets]

# GIF Pre-Process

In [26]:
import requests
# Requests is an elegant and simple HTTP library for Python
import os 
# os — Miscellaneous operating system interfaces¶
os.chdir('./gifs')
# !pwd

In [27]:
def gif_downloader(image_urls, status=[], filenames = []):
    
    for index, img in enumerate(image_urls):
        # We can split the file based upon / and extract the last split within the python list below:
        file_name = img.split('/')[-1]
        #print("fThis is the file name: {file_name}")
        filenames.append(file_name) 
        # Now let's send a request to the image URL:
        r = requests.get(img, stream=True)
        # We can check that the status code is 200 before doing anything else:
        if r.status_code == 200:
            # This command below will allow us to write the data to a file as binary:
            with open(file_name, 'wb') as f:
                for chunk in r:
                    f.write(chunk)
            status.append(True)
            print("index: " + str(index) + " downloaded success. Saved as "+ file_name)
        else:
            # We will write all of the images back to the broken_images list:
            print("index: " + str(index) + "downloaded failed")
            status.append(False)
    df = pd.DataFrame(filenames)
    df.to_csv('filenames.csv') 
    return filenames, status

In [28]:
download_status = []
filenames = []
filenames, downlod_status = gif_downloader(gif_links[:2000], status=download_status)
# if any gif was not downloaded successfully 

index: 0 downloaded success. Saved as tumblr_mevmyaKtDf1rgvhr8o1_500.gif
index: 1 downloaded success. Saved as tumblr_nok4eeONTv1s2yegdo1_400.gif
index: 2 downloaded success. Saved as tumblr_mllh01J96X1s9npefo1_250.gif
index: 3 downloaded success. Saved as tumblr_nqlr0rn8ox1r2r0koo1_400.gif
index: 4 downloaded success. Saved as tumblr_mvtuwlhSkE1qbnleeo1_500.gif
index: 5 downloaded success. Saved as tumblr_npw7v7W07C1tmj047o1_250.gif
index: 6 downloaded success. Saved as tumblr_mdlv9v6hE91qanrf2o1_r11_500.gif
index: 7 downloaded success. Saved as tumblr_nkcmeflaVj1u26rdio1_500.gif
index: 8 downloaded success. Saved as tumblr_mkwd0y8Poo1qlnbq8o1_400.gif
index: 9 downloaded success. Saved as tumblr_msij5q4Run1qd76t9o1_500.gif


In [None]:
print(any(status == False for status in download_status))
print(download_status.index(False))

In [148]:
# filenames
!pwd

/home/marxw/Desktop


In [10]:
# https://gist.github.com/kmohrf/8d4653536aaa88965a69a06b81bcb022
def calculate_image_brightness(image):
    greyscale_image = image.convert('L')
    histogram = greyscale_image.histogram()
    pixels = sum(histogram)
    brightness = scale = len(histogram)
    for index in range(0, scale):
        ratio = histogram[index] / pixels
        brightness += ratio * (-scale + index)
    return 1 if brightness == 255 else brightness / scale
def calculate_gif_brightness(path_to_gif_file):
    capture = Image.open(path_to_gif_file)
    gif_brightness = []
    for frame in ImageSequence.Iterator(capture):
        gif_brightness.append(calculate_image_brightness(frame))
    return sum(gif_brightness) / len(gif_brightness)
# In GIF files, each frame has its own duration. So there is no general fps for a GIF file. 
brightness = []
for gif_file_path in filenames[:50]: 
    brightness.append(calculate_gif_brightness(gif_file_path))

NameError: name 'filenames' is not defined

In [63]:
def calculate_gif_entropy(path_to_gif_file):
    capture = Image.open(path_to_gif_file)
    gif_entropy = []
    for frame in ImageSequence.Iterator(capture):
        gif_entropy.append(frame.entropy())
    return sum(gif_entropy) / len(gif_entropy)
# In GIF files, each frame has its own duration. So there is no general fps for a GIF file. 
entropy = []
for gif_file_path in filenames[:50]: 
    entropy.append(calculate_gif_entropy(gif_file_path))

In [66]:
def get_avg_fps(PIL_Image_object):
    """ Returns the average framerate of a PIL Image object """
    PIL_Image_object.seek(0)
    frames = duration = 0
    while True:
        try:
            frames += 1
            duration += PIL_Image_object.info['duration']
            PIL_Image_object.seek(PIL_Image_object.tell() + 1)
        except EOFError:
            return frames / duration * 1000
    return None
# In GIF files, each frame has its own duration. So there is no general fps for a GIF file. 
framerates = []
for gif_file_path in filenames[:50]: 
    gif_obj = Image.open(gif_file_path)
    framerates.append(get_avg_fps(gif_obj))

In [67]:
df = pd.DataFrame(filenames)
df['text'] = data['x']
df['brightness'] = brightness
df['entropy'] = entropy
df['fps'] = framerates

In [68]:
df

Unnamed: 0,0,text,brightness,entropy,fps
0,tumblr_mevmyaKtDf1rgvhr8o1_500.gif,"a man is glaring, and someone with sunglasses ...",0.233874,5.17009,5.0
1,tumblr_nok4eeONTv1s2yegdo1_400.gif,a cat tries to catch a mouse on a tablet,0.372801,5.225728,16.666667
2,tumblr_mllh01J96X1s9npefo1_250.gif,a man dressed in red is dancing.,0.265175,5.84086,5.263158
3,tumblr_nqlr0rn8ox1r2r0koo1_400.gif,an animal comes close to another in the jungle,0.475178,3.687133,11.111111
4,tumblr_mvtuwlhSkE1qbnleeo1_500.gif,a man in a hat adjusts his tie and makes a wei...,0.210181,3.205473,10.0
5,tumblr_npw7v7W07C1tmj047o1_250.gif,someone puts a cat on wrapping paper then wrap...,0.277893,5.121114,6.25
6,tumblr_mdlv9v6hE91qanrf2o1_r11_500.gif,a brunette woman is looking at the man,0.347571,6.214434,6.875
7,tumblr_nkcmeflaVj1u26rdio1_500.gif,a man on a bicycle is jumping over a fence.,0.395059,6.722588,16.666667
8,tumblr_mkwd0y8Poo1qlnbq8o1_400.gif,a group of men are standing and staring in the...,0.338757,6.339215,10.0
9,tumblr_msij5q4Run1qd76t9o1_500.gif,a man with black clothes is dancing sexy,0.864458,5.170477,12.5


# Gif Suggestion


In [79]:
##Model Import
!pwd
os.chdir('../')


/home/marxw/Desktop/AwesomeGif


In [84]:

avgBrightnessModel = keras.models.load_model('/home/marxw/Desktop/AwesomeGif/GIF-models/avg_brightness_prediction_model.h5')
entropyModel = keras.models.load_model('/home/marxw/Desktop/AwesomeGif/GIF-models/entropy_prediction_model.h5')
framerateModel = keras.models.load_model('/home/marxw/Desktop/AwesomeGif/GIF-models/framerate_prediction_model.h5')

In [85]:
def format_text(raw_text):
    clean_text = preprocess_text(raw_text)
    tokenized_text = tokenize_tweets(clean_text)
    formatted_text = tf.ragged.constant([tokenized_text], dtype=tf.int32)
    return formatted_text

In [98]:
def predict_GIF_features(formatted_input):
    brightness = avgBrightnessModel.predict(formatted_input)[0]
    entropy = entropyModel.predict(formatted_input)[0]
    framerate = framerateModel.predict(formatted_input)[0]
    summary = {'brightness': brightness, 'entropy': entropy, 'fps': framerate}
    return summary 

In [115]:
pred = predict_GIF_features(format_text("This is so stupid"))

In [116]:
pred

{'brightness': array([0.21615382], dtype=float32),
 'entropy': array([1.4850472], dtype=float32),
 'fps': array([4.627921], dtype=float32)}

In [143]:
def suggest(text, df=df):
    pred = predict_GIF_features(format_text(text))
    dfa =  df[(0.5*pred['brightness'][0] < df['brightness']) & (df['brightness'] < 1.5*pred['brightness'][0])]
    if(dfa.size==0): return "Oops no match found"
    else: return dfa[0][0]

In [145]:
suggest("what about fox")

'tumblr_mevmyaKtDf1rgvhr8o1_500.gif'

In [121]:
 df[(0.8*pred['brightness'][0] < df['brightness']) & (df['brightness'] < 1.2*pred['brightness'][0])]

Unnamed: 0,0,text,brightness,entropy,fps
0,tumblr_mevmyaKtDf1rgvhr8o1_500.gif,"a man is glaring, and someone with sunglasses ...",0.233874,5.17009,5.0
4,tumblr_mvtuwlhSkE1qbnleeo1_500.gif,a man in a hat adjusts his tie and makes a wei...,0.210181,3.205473,10.0
12,tumblr_nov5tja4g91rkmzrjo1_400.gif,a man walks into a room and sees a girl floati...,0.190003,4.738599,10.0
14,tumblr_nq3ofebQo51s4vpwjo1_500.gif,"a beatles show, paul is on the front singing v...",0.240527,4.909666,16.666667
21,tumblr_npntschfQp1qf69tyo1_500.gif,a guy are kissing a girl slowly in a swimming ...,0.234896,5.445038,10.0
42,tumblr_nnn5873T0c1uqclqmo1_500.gif,a girl is smoking with his friend in the house,0.180697,3.231666,10.0


In [118]:
 df[(0.5*pred['entropy'][0] < df['entropy']) & (df['entropy'] < 1.5*pred['entropy'][0])]

Unnamed: 0,0,text,brightness,entropy,fps


In [117]:
 df[(0.5*pred['fps'][0] < df['fps']) & (df['fps'] < 1.5*pred['fps'][0])]

Unnamed: 0,0,text,brightness,entropy,fps
0,tumblr_mevmyaKtDf1rgvhr8o1_500.gif,"a man is glaring, and someone with sunglasses ...",0.233874,5.17009,5.0
2,tumblr_mllh01J96X1s9npefo1_250.gif,a man dressed in red is dancing.,0.265175,5.84086,5.263158
5,tumblr_npw7v7W07C1tmj047o1_250.gif,someone puts a cat on wrapping paper then wrap...,0.277893,5.121114,6.25
6,tumblr_mdlv9v6hE91qanrf2o1_r11_500.gif,a brunette woman is looking at the man,0.347571,6.214434,6.875
16,tumblr_nmzcu205yo1qdj2qoo1_500.gif,"a woman is laughing and holding a man, the man...",0.369162,6.103437,5.0
32,tumblr_nj4iqcVaFQ1s26nzro1_500.gif,a man are holding a bottle taps it on his head.,0.608701,3.280546,5.0
45,tumblr_npgxznNRd31uv0y2ro1_250.gif,a cute little bulldog puppy is taking his tong...,0.412464,6.659292,4.761905


array([2.9850998], dtype=float32)

In [114]:
df['fps']

0      5.000000
1     16.666667
2      5.263158
3     11.111111
4     10.000000
5      6.250000
6      6.875000
7     16.666667
8     10.000000
9     12.500000
10     8.333333
11    10.000000
12    10.000000
13     9.090909
14    16.666667
15    12.500000
16     5.000000
17    10.000000
18     7.692308
19    11.111111
20    20.000000
21    10.000000
22    10.000000
23     9.090909
24    10.000000
25    33.333333
26    16.666667
27    33.333333
28     8.333333
29     7.932011
30    10.000000
31    12.500000
32     5.000000
33    14.285714
34    10.000000
35    20.000000
36    14.285714
37    10.000000
38    11.111111
39    20.000000
40    14.285714
41    10.000000
42    10.000000
43    25.000000
44    10.000000
45     4.761905
46    10.000000
47    11.864407
48    12.500000
49    12.500000
Name: fps, dtype: float64