In [1]:
import pandas as pd
import numpy as np
import time
from os import walk
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.data.experimental import AUTOTUNE
from PIL import Image

### GPU

In [2]:
# https://spltech.co.uk/how-to-install-tensorflow-2-5-with-cuda-11-2-and-cudnn-8-1-for-windows-10/

In [3]:
tf.config.list_physical_devices('GPU')

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]

### Dataset

## Creating the Meme Dataset

In [4]:
# https://ai.facebook.com/blog/hateful-memes-challenge-and-data-set/
# path_memes = r'data/memotion_dataset\memotion_dataset_7k\images'
path_memes = r'data\kaggle_meme_dataset_2018\images\kaggle_images'

# https://research.google.com/youtube8m/explore.html
path_not_memes = r'data/coco_dataset/val2017'

In [5]:
def conduct_dataframe(path_memes, path_not_memes):
    _, _, filenames_memes = next(walk(path_memes))
    _, _, filenames_not_memes = next(walk(path_not_memes))
    memes = [[path_memes + '/' + path, 'meme'] for path in filenames_memes]
    not_memes = [[path_not_memes + '/' + path, 'not_meme'] for path in filenames_not_memes]
    df = pd.DataFrame(data=memes[:5000] + not_memes[:5000], columns=['path', 'label'])
    return df

In [6]:
df = conduct_dataframe(path_memes, path_not_memes)

In [7]:
df.path.values

array(['data\\kaggle_meme_dataset_2018\\images\\kaggle_images/001i5ooagzm11_redt.jpg',
       'data\\kaggle_meme_dataset_2018\\images\\kaggle_images/002fmrbao0z01_redt.jpg',
       'data\\kaggle_meme_dataset_2018\\images\\kaggle_images/007ss62yk9u11_redt.jpg',
       ..., 'data/coco_dataset/val2017/000000581482.jpg',
       'data/coco_dataset/val2017/000000581615.jpg',
       'data/coco_dataset/val2017/000000581781.jpg'], dtype=object)

In [8]:
df.label.value_counts()

not_meme    5000
meme        5000
Name: label, dtype: int64

In [9]:
def split_dataframe(df, test_size):
    df_train, df_val = train_test_split(df, test_size=test_size)
    return df_train, df_val

In [10]:
df_train, df_val = split_dataframe(df, test_size=0.2)

In [11]:
df_train.label.value_counts()

not_meme    4031
meme        3969
Name: label, dtype: int64

In [12]:
df_val.label.value_counts()

meme        1031
not_meme     969
Name: label, dtype: int64

In [13]:
def image_dataset_from_dataframe(
                 df,
                 X,
                 y,
                 batch_size=None,
                 shuffle=True,
                 seed=None,
                 image_size=None,
                 prefetch=True,
                 ):
    
    def path_to_image(path,label):
        img = tf.io.read_file(path)
        img = tf.io.decode_image(img,channels=3,expand_animations = False)
        img = tf.image.resize(img, image_size)
        return img,label
   
    paths = df[X]
    labels_unq = np.unique(df[y])
    
    class_indices = dict(zip(labels_unq,range(len(labels_unq))))
    labels = df[y].replace(class_indices)
    
    ds = tf.data.Dataset.from_tensor_slices((paths,labels))
    
    if shuffle: 
        ds = ds.shuffle(buffer_size=len(df), seed=seed, reshuffle_each_iteration=True)
    
    ds = ds.map(path_to_image, num_parallel_calls=AUTOTUNE)
    
    if batch_size is not None:
        ds = ds.batch(batch_size, drop_remainder=True)
    if prefetch:
        ds = ds.prefetch(buffer_size=AUTOTUNE)
    
    return ds, class_indices

In [14]:
ds_train,class_indices = image_dataset_from_dataframe(df_train,
                                 X='path',
                                 y='label',
                                 batch_size=1,
                                 seed=42,
                                 image_size=(600,600),
                                 prefetch=True)

In [15]:
ds_val,_ = image_dataset_from_dataframe(df_val,
                                 X='path',
                                 y='label',
                                 batch_size=1,
                                 seed=42,
                                 image_size=(600,600),
                                 prefetch=True)

In [16]:
class_indices

{'meme': 0, 'not_meme': 1}

In [17]:
type(ds_train)

tensorflow.python.data.ops.dataset_ops.PrefetchDataset

### Metrics

In [None]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]

### Model EfficientNet (2019)

In [None]:
# https://www.tensorflow.org/api_docs/python/tf/keras/applications/EfficientNetB7
# https://arxiv.org/abs/1905.11946
# https://www.image-net.org/about.php

In [None]:
METRICS = [
      tf.keras.metrics.TruePositives(name='tp'),
      tf.keras.metrics.FalsePositives(name='fp'),
      tf.keras.metrics.TrueNegatives(name='tn'),
      tf.keras.metrics.FalseNegatives(name='fn'), 
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall'),
      tf.keras.metrics.AUC(name='auc'),
]

In [21]:
base_model = tf.keras.applications.EfficientNetB7(input_shape=(600,600,3),
                                               include_top=False,
                                               weights='imagenet')
base_model.trainable = False

inputs = tf.keras.Input(shape=(600,600,3))
x = base_model(inputs,training=False)
x = tf.keras.layers.GlobalAveragePooling2D()(x)
x = tf.keras.layers.Dropout(0.2)(x)
outputs = tf.keras.layers.Dense(1,activation='sigmoid')(x)
model = tf.keras.Model(inputs, outputs)

In [22]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=base_learning_rate),
              loss=tf.keras.losses.BinaryCrossentropy(from_logits=False),
              metrics=METRICS)

In [23]:
class TimeHistory(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs={}):
        self.times = []

    def on_epoch_begin(self, batch, logs={}):
        self.epoch_time_start = time.time()

    def on_epoch_end(self, batch, logs={}):
        self.times.append(time.time() - self.epoch_time_start)

### Training

In [24]:
time_collector = TimeHistory()

initial_epochs = 5
history = model.fit(ds_train,
                    epochs=initial_epochs,
                    initial_epoch = 0,
                    validation_data=ds_val,
                    callbacks=[time_collector])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


### Results

In [25]:
df_scores_base = pd.DataFrame(dict([('epoch',history.epoch)]+list(history.history.items())+[('time',time_collector.times)]))

In [26]:
df_scores_base

Unnamed: 0,epoch,loss,tp,fp,tn,fn,accuracy,precision,recall,auc,val_loss,val_tp,val_fp,val_tn,val_fn,val_accuracy,val_precision,val_recall,val_auc,time
0,0,0.244475,3592.0,97.0,3872.0,439.0,0.933,0.973706,0.891094,0.990013,0.106141,957.0,15.0,1016.0,12.0,0.9865,0.984568,0.987616,0.998806,1282.77617
1,1,0.090975,3951.0,60.0,3909.0,80.0,0.9825,0.985041,0.980154,0.998179,0.058351,961.0,10.0,1021.0,8.0,0.991,0.989701,0.991744,0.999622,1258.562458
2,2,0.062288,3974.0,64.0,3905.0,57.0,0.984875,0.984151,0.98586,0.99896,0.042516,961.0,11.0,1020.0,8.0,0.9905,0.988683,0.991744,0.999794,1234.843171
3,3,0.048204,3984.0,50.0,3919.0,47.0,0.987875,0.987605,0.98834,0.999411,0.034795,963.0,10.0,1021.0,6.0,0.992,0.989722,0.993808,0.999834,1260.194529
4,4,0.040923,3989.0,48.0,3921.0,42.0,0.98875,0.98811,0.989581,0.999481,0.029318,962.0,8.0,1023.0,7.0,0.9925,0.991753,0.992776,0.999871,1265.858961


In [27]:
df_scores_base.to_csv('df_scores_base.csv')

### Usage

In [28]:
model.save('meme_classification_EfficientNetB7')



INFO:tensorflow:Assets written to: meme_classification_EfficientNetB7\assets


In [29]:
saved_model = tf.keras.models.load_model('meme_classification_EfficientNetB7')



In [30]:
class_indices

{'meme': 0, 'not_meme': 1}

In [31]:
def predict(img_path, class_indices=class_indices, model=saved_model):
    
    img = Image.open(img_path)
    img = img.resize((600,600))
    x = np.asarray(img)
    x = x.reshape(1,600,600,3)
    
    pred = model.predict(x)
    result = pred, list(class_indices.keys())[round(pred[0][0])]
    
    return result

In [32]:
predict(r'data/own_dataset/mixed/01235.png')

(array([[0.00010878]], dtype=float32), 'meme')

In [33]:
predict(r'data/own_dataset/653bdt.jpg')

(array([[0.00120741]], dtype=float32), 'meme')

In [34]:
predict('data/coco_dataset/val2017/000000460927.jpg')

(array([[0.95967585]], dtype=float32), 'not_meme')

In [35]:
predict('data/coco_dataset/val2017/000000459809.jpg')

(array([[0.9972875]], dtype=float32), 'not_meme')