<a href="https://colab.research.google.com/github/schesa/ai-memes/blob/master/ai_memes.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Mount Drive

In [1]:
try:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)
    print('Note: using Google CoLab')
    COLAB = True
except:
    print('Note: not using Google Colab')
    COLAB = False

if COLAB:
    root_path = "/content/drive/My Drive/licenta AC/Chesa/data"
else:
    root_path = "./data/captions"

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3aietf%3awg%3aoauth%3a2.0%3aoob&response_type=code&scope=email%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdocs.test%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive%20https%3a%2f%2fwww.googleapis.com%2fauth%2fdrive.photos.readonly%20https%3a%2f%2fwww.googleapis.com%2fauth%2fpeopleapi.readonly

Enter your authorization code:
··········
Mounted at /content/drive
Note: using Google CoLab


### Get Dataset
* **root_captioning** = "/content/drive/My Drive/licenta AC/Chesa/data"
* **lookup** -> dict key: Meme id, value list captions containing words & punctuations splitted by space
* remove non ascii captions
* lex -> set of unique words
* max_length -> Maximum length of a caption (in words)

### Prepare Dataset
* split into **train** and **test**
* train_images set, train_img list, string path '3001612175_53567ffb58.jpg'
* train_descriptions set, add start/stop get from lookup by train_image 
  ['2092870249_90e3f1855b': ['startseq a ... , ... a jack . endseq'],]
* all_train_captions - all merged in list
* InceptionV3



In [2]:
import os
from pathlib import Path
import json
from tqdm import tqdm
from functools import reduce

isascii_word = lambda w: len(w) == len(w.encode())
isascii_list = lambda l: reduce(lambda rez, word: rez & True if isascii_word(word) else False & rez, l, True)
dirname = os.path.join(root_path, 'ImgFlip500K_Dataset', 'memes')
lookup = dict()

def print_iterator(it):
    for x in it:
        print(x, end='\n')
    print('')  # for new line

for filename in tqdm(os.listdir(dirname)): # foreach json file in memes
    meme_name = Path(filename).stem # remove file extension
    with open(os.path.join(dirname, filename)) as json_file:
      memes = json.load(json_file)
      # lookup[meme_name] = list(map(lambda meme: ' | '.join(meme['boxes']), memes))
      lookup[meme_name] = []
      for meme in memes:
        words = ' | '.join(meme['boxes'])
        if isascii_list(words):
          lookup[meme_name].append(words)


print(f'Memes loaded: {len(lookup)}') # 99 memes, in the latest dataset 100
print(f'Meme example: {lookup["Mocking-Spongebob"][0]}') # when you off the dope | and you think you a bird

100%|██████████| 99/99 [00:50<00:00,  1.97it/s]

Memes loaded: 99
Meme example: when you off the dope | and you think you a bird





In [3]:
max_length = 0
lex = set() # set of unique words
# for key in lookup:
#   [lex.update(d.split()) for d in lookup[key]]
#   big_word = reduce(lambda l, rez: len(max(l.split(), key=lambda x: len(x))) if len(max(l.split(), key=lambda x: len(x))) > len(rez) else rez, lookup[key])
#   if(len(big_word)> max_length):
#     max_length = len(big_word)
#     max_word = big_word
#   break
for desc in lookup:
  for word in desc.split():
    lex.add(word)
    if max_length < len(word):
      max_length=max(max_length,len(word))
      max_word = word

print(f'Unique words: {len(lex)}')
print(f'Biggest word: {max_length} chars')
max_word


Unique words: 99
Biggest word: 46 chars


'Bernie-I-Am-Once-Again-Asking-For-Your-Support'

In [4]:
START = "startseq"
STOP = "endseq"
# same as lookup but captions wrapped in Start/Stop
train_descriptions = {k:v for k,v in lookup.items()}
for n,v in train_descriptions.items(): 
  for d in range(len(v)):
    v[d] = f'{START} {v[d]} {STOP}'
    
print(f'Wrapped captions in Start/Stop')

Wrapped captions in Start/Stop


In [5]:
from tensorflow.keras.applications.inception_v3 import InceptionV3
import tensorflow.keras.applications.inception_v3
from tensorflow.keras.models import Model

encode_model = InceptionV3(weights='imagenet')
encode_model = Model(encode_model.input, encode_model.layers[-2].output)
WIDTH = 299
HEIGHT = 299
OUTPUT_DIM = 2048
preprocess_input = tensorflow.keras.applications.inception_v3.preprocess_input

print('InceptionV3 loaded!')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels.h5
InceptionV3 loaded!


In [0]:
from PIL import Image
import tensorflow.keras.preprocessing.image
import numpy as np

def encodeImage(img):
  # Resize all images to a standard size (specified bythe image encoding network)
  img = img.resize((WIDTH, HEIGHT), Image.ANTIALIAS)
  # Convert a PIL image to a numpy array
  x = tensorflow.keras.preprocessing.image.img_to_array(img)
  # Expand to 2D array
  x = np.expand_dims(x, axis=0)
  # Perform any preprocessing needed by InceptionV3 or others
  x = preprocess_input(x)
  # Call InceptionV3 (or other) to extract the smaller feature set for the image.
  x = encode_model.predict(x) # Get the encoding vector for the image
  # Shape to correct form to be accepted by LSTM captioning network.
  x = np.reshape(x, OUTPUT_DIM )
  return x

In [0]:
# Nicely formatted time string
def hms_string(sec_elapsed):
    h = int(sec_elapsed / (60 * 60))
    m = int((sec_elapsed % (60 * 60)) / 60)
    s = sec_elapsed % 60
    return f"{h}:{m:>02}:{s:>05.2f}"

In [28]:
import pickle
from time import time

train_path = os.path.join(root_path,'ImgFlip500K_Dataset',"data",f'train{OUTPUT_DIM}.pkl')
if not os.path.exists(train_path):
  start = time()
  encoding_train = {}
  train_images_path = os.path.join(root_path,'ImgFlip500K_Dataset','templates','img') 
  for image_path in tqdm(os.listdir(train_images_path)):
    # print(image_path)
    img = tensorflow.keras.preprocessing.image.load_img(os.path.join(train_images_path,image_path), target_size=(HEIGHT, WIDTH))
    encoding_train[image_path] = encodeImage(img)
  with open(train_path, "wb") as fp:
    pickle.dump(encoding_train, fp)
  print(f"\nGenerating training set took: {hms_string(time()-start)}")
else:
  with open(train_path, "rb") as fp:
    encoding_train = pickle.load(fp)
print('Loaded')

Loaded
