In [1]:
!pip install transformers kaggle -q

You should consider upgrading via the '/home/ec2-user/anaconda3/envs/tensorflow2_p36/bin/python -m pip install --upgrade pip' command.[0m


In [2]:
!mkdir ~/.kaggle

In [3]:
!cp kaggle.json ~/.kaggle/

In [4]:
!chmod 600 ~/.kaggle/kaggle.json

In [5]:
!kaggle datasets list

ref                                                               title                                              size  lastUpdated          downloadCount  voteCount  usabilityRating  
----------------------------------------------------------------  ------------------------------------------------  -----  -------------------  -------------  ---------  ---------------  
jmmvutu/summer-products-and-sales-in-ecommerce-wish               Sales of summer clothes in E-commerce Wish        376KB  2020-08-23 15:16:46           1764         83  0.9705882        
Cornell-University/arxiv                                          arXiv Dataset                                     877MB  2020-08-14 23:50:57           1918        437  0.875            
agirlcoding/all-space-missions-from-1957                          All Space Missions from 1957                      101KB  2020-08-13 16:18:58           1188        123  0.85294116       
landlord/handwriting-recognition                            

In [6]:
!kaggle datasets download -d parthplc/facebook-hateful-meme-dataset

Downloading facebook-hateful-meme-dataset.zip to /home/ec2-user/SageMaker/tensorflow2-advanced-operations/Special Projects
 99%|██████████████████████████████████████▊| 3.34G/3.35G [01:07<00:00, 108MB/s]
100%|██████████████████████████████████████| 3.35G/3.35G [01:07<00:00, 53.1MB/s]


In [7]:
!unzip facebook-hateful-meme-dataset.zip;

Archive:  facebook-hateful-meme-dataset.zip
  inflating: data/LICENSE.txt        
  inflating: data/README.md          
  inflating: data/dev.jsonl          
  inflating: data/img/01235.png      
  inflating: data/img/01236.png      
  inflating: data/img/01243.png      
  inflating: data/img/01245.png      
  inflating: data/img/01247.png      
  inflating: data/img/01256.png      
  inflating: data/img/01258.png      
  inflating: data/img/01264.png      
  inflating: data/img/01268.png      
  inflating: data/img/01269.png      
  inflating: data/img/01274.png      
  inflating: data/img/01275.png      
  inflating: data/img/01276.png      
  inflating: data/img/01284.png      
  inflating: data/img/01293.png      
  inflating: data/img/01295.png      
  inflating: data/img/01324.png      
  inflating: data/img/01325.png      
  inflating: data/img/01327.png      
  inflating: data/img/01329.png      
  inflating: data/img/01348.png      
  inflating: data/img/01349.png      
  infl

In [None]:
import transformers

import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt

In [2]:
tf.__version__

'2.3.0'

In [3]:
physical_devices = tf.config.list_physical_devices('GPU')
try:
  tf.config.experimental.set_memory_growth(physical_devices[0], True)
except:
  print("Invalid device or cannot modify virtual devices once initialized.")

In [4]:
train_df = pd.read_json('data/train.jsonl', lines = True)
train_df["img"] = "data/" + train_df["img"]

In [5]:
val_df = pd.read_json('data/dev.jsonl', lines = True)
val_df["img"] = "data/" + val_df["img"]

In [6]:
img_width = 224
img_height = 224

In [7]:
tokenizer = transformers.DistilBertTokenizer.from_pretrained('distilbert-base-uncased')

In [8]:
max_len = 128

### Train

In [9]:
def build_model():
    
    encoder = transformers.TFDistilBertModel.from_pretrained("distilbert-base-uncased")
    encoder.trainable = True

    input_img = tf.keras.layers.Input(
        shape = (img_width, img_height, 3), name = "image"
    )
    
    input_ids = tf.keras.layers.Input(name = "input_ids", shape = (max_len,), dtype = tf.int32)

    attention_mask = tf.keras.layers.Input(name = "attention_mask", shape = (max_len,), dtype = tf.int32)

    y = encoder(input_ids, attention_mask = attention_mask)[0]
    
    y = tf.keras.layers.Bidirectional(tf.keras.layers.GRU(128, return_sequences = True))(y)

    extractor = tf.keras.applications.EfficientNetB7(include_top = False, \
                                                 input_tensor = input_img, weights = "imagenet")
    
    extractor.trainable = False
    
    for layer in extractor.layers:
        if(layer.name.startswith("block7") or layer.name.startswith("block6")):
            layer.trainable = True

    x = tf.keras.layers.GlobalMaxPooling2D()(extractor.output)

    y = tf.keras.layers.GlobalMaxPooling1D()(y)
    
    x = tf.keras.layers.concatenate([x, y])
    
    x = tf.keras.layers.Dense(1024)(x)
    
    x = tf.keras.layers.Dropout(0.2)(x)
    
    x = tf.keras.layers.Dense(128)(x)
    
    x = tf.keras.layers.Dropout(0.2)(x)
    
    out = tf.keras.layers.Dense(1, activation = 'sigmoid')(x)

    model = tf.keras.models.Model([input_img, input_ids, attention_mask], out)

    return model

In [10]:
model = build_model()
model.compile(optimizer = "adam", loss = "binary_crossentropy", metrics = ["accuracy"])
model.summary()

Some weights of the model checkpoint at distilbert-base-uncased were not used when initializing TFDistilBertModel: ['vocab_layer_norm', 'vocab_projector', 'vocab_transform', 'activation_13']
- This IS expected if you are initializing TFDistilBertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPretraining model).
- This IS NOT expected if you are initializing TFDistilBertModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
All the weights of TFDistilBertModel were initialized from the model checkpoint at distilbert-base-uncased.
If your task is similar to the task the model of the ckeckpoint was trained on, you can already use TFDistilBertModel for predictions without further training.


Model: "functional_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
image (InputLayer)              [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
rescaling (Rescaling)           (None, 224, 224, 3)  0           image[0][0]                      
__________________________________________________________________________________________________
normalization (Normalization)   (None, 224, 224, 3)  7           rescaling[0][0]                  
__________________________________________________________________________________________________
stem_conv_pad (ZeroPadding2D)   (None, 225, 225, 3)  0           normalization[0][0]              
_______________________________________________________________________________________

In [11]:
def encode_single_sample(img_path, label, text):
    img = tf.io.read_file(img_path)
    img = tf.io.decode_png(img, channels = 3)
    img = tf.image.convert_image_dtype(img, tf.int32)
    img = tf.image.resize(img, [img_height, img_width])
    text = tokenizer(text, return_tensors = 'tf')
    return {"image": img, "label": label, "text": text}

In [12]:
class HatefulMemes(tf.keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, input_img_paths, label, text):
        self.batch_size = batch_size
        self.input_img_paths = input_img_paths
        self.text = text
        self.label = label

    def __len__(self):
        return len(self.input_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_label = self.label[i : i + self.batch_size]
        batch_text = self.text[i : i + self.batch_size]
        w = np.zeros((self.batch_size,) + (img_height, img_width) + (3,), dtype = "float32")
        x = np.zeros((self.batch_size,max_len,), dtype = "int32")
        y = np.zeros((self.batch_size,max_len,), dtype = "int32")
        z = np.zeros((self.batch_size), dtype = "float32")
        
        for i,j,k,l in zip(range(self.batch_size), batch_input_img_paths, batch_label, batch_text):
            sample = encode_single_sample(j,k,l)
            w[i] = sample["image"].numpy().tolist()
            input_ids = sample["text"]['input_ids'].numpy().tolist()[0]
            x[i] = input_ids + [0]*(max_len - len(input_ids))
            attention_mask = sample["text"]['attention_mask'].numpy().tolist()[0]
            y[i] = attention_mask + [0]*(max_len - len(attention_mask))
            z[i] = sample["label"]
        
        return [w,x,y], z

In [13]:
train_gen = HatefulMemes(16, train_df["img"].values.tolist(), train_df["label"].values.tolist(), \
                         train_df["text"].values.tolist())
val_gen = HatefulMemes(4, val_df["img"].values.tolist(), val_df["label"].values.tolist(), \
                         val_df["text"].values.tolist())

In [14]:
epochs = 50
early_stopping_patience = 5

early_stopping = tf.keras.callbacks.EarlyStopping(
    monitor = "val_accuracy", patience = early_stopping_patience, restore_best_weights = True
)

checkpoint = tf.keras.callbacks.ModelCheckpoint(
    'hateful_flatten', monitor = 'val_accuracy', verbose = 1, save_best_only = True, mode = 'max'
)

history = model.fit(train_gen, \
                    validation_data = val_gen, epochs = epochs, callbacks = [early_stopping, checkpoint])

Epoch 1/50
Epoch 00001: val_accuracy improved from -inf to 0.52400, saving model to hateful_flatten
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
INFO:tensorflow:Assets written to: hateful_flatten/assets
Epoch 2/50
Epoch 00002: val_accuracy improved from 0.52400 to 0.55200, saving model to hateful_flatten
INFO:tensorflow:Assets written to: hateful_flatten/assets
Epoch 3/50
Epoch 00003: val_accuracy did not improve from 0.55200
Epoch 4/50
Epoch 00004: val_accuracy did not improve from 0.55200
Epoch 5/50
Epoch 00005: val_accuracy did not improve from 0.55200


### Predict

In [15]:
train_df = pd.read_json('data/test.jsonl', lines = True)
train_df["img"] = "data/" + train_df["img"]

In [16]:
train_df

Unnamed: 0,id,img,text
0,16395,data/img/16395.png,handjobs sold seperately
1,37405,data/img/37405.png,introducing fidget spinner for women
2,94180,data/img/94180.png,happy pride month let's go beat up lesbians
3,54321,data/img/54321.png,laughs in [majority of u.s crime rate]
4,97015,data/img/97015.png,finds out those 72 virgins.. are goats
...,...,...,...
995,3869,data/img/03869.png,a mother's love for the child is a divine thing
996,23817,data/img/23817.png,sea monkeys
997,56280,data/img/56280.png,little miss muffet sat on her tuffet
998,29384,data/img/29384.png,they're in a row


In [17]:
def val_encode_single_sample(img_path, text):
    img = tf.io.read_file(img_path)
    img = tf.io.decode_png(img, channels = 3)
    img = tf.image.convert_image_dtype(img, tf.int32)
    img = tf.image.resize(img, [img_height, img_width])
    text = tokenizer(text, return_tensors = 'tf')
    return {"image": img, "text": text}

In [18]:
class ValHatefulMemes(tf.keras.utils.Sequence):
    """Helper to iterate over the data (as Numpy arrays)."""

    def __init__(self, batch_size, input_img_paths, text):
        self.batch_size = batch_size
        self.input_img_paths = input_img_paths
        self.text = text

    def __len__(self):
        return len(self.input_img_paths) // self.batch_size

    def __getitem__(self, idx):
        """Returns tuple (input, target) correspond to batch #idx."""
        i = idx * self.batch_size
        batch_input_img_paths = self.input_img_paths[i : i + self.batch_size]
        batch_text = self.text[i : i + self.batch_size]
        w = np.zeros((self.batch_size,) + (img_height, img_width) + (3,), dtype = "float32")
        x = np.zeros((self.batch_size,max_len,), dtype = "int32")
        y = np.zeros((self.batch_size,max_len,), dtype = "int32")
        
        for i,j,k in zip(range(self.batch_size), batch_input_img_paths, batch_text):
            sample = val_encode_single_sample(j,k)
            w[i] = sample["image"].numpy().tolist()
            input_ids = sample["text"]['input_ids'].numpy().tolist()[0]
            for _ in range(max_len - len(input_ids)):
              input_ids.append(0)
            x[i] = input_ids
            attention_mask = sample["text"]['attention_mask'].numpy().tolist()[0]
            for _ in range(max_len - len(attention_mask)):
              attention_mask.append(0)
            y[i] = attention_mask
        
        return [w,x,y]

In [19]:
pred_gen = ValHatefulMemes(8, train_df["img"].values.tolist(), train_df["text"].values.tolist())

In [20]:
preds = model.predict(pred_gen, verbose = 1)



In [21]:
def prob2pred(x):
    if x > 0.5:
        return 1
    else:
        return 0

In [22]:
train_df["proba"] = [i[0] for i in preds.tolist()]
train_df["label"] = train_df["proba"].apply(prob2pred)

In [23]:
train_df = train_df.drop(["img","text"], axis = 1)
train_df

Unnamed: 0,id,proba,label
0,16395,0.442940,0
1,37405,0.587811,1
2,94180,0.616231,1
3,54321,0.265343,0
4,97015,0.466702,0
...,...,...,...
995,3869,0.218250,0
996,23817,0.331855,0
997,56280,0.216410,0
998,29384,0.145194,0


In [24]:
train_df.to_csv("submission_new.csv", index = False)