# Drawception Reactions

This Notebook will create a model to predict the number of reactions a Drawception image will recieve. The reactions are binned into 3 classes for 0, 1-3, and 4+ reactions.

In [1]:
# Import required libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

import os
import PIL
import PIL.Image

from sklearn.metrics import accuracy_score

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils

In [2]:
# Grab the data
drawception = pd.read_csv('../data/drawception_master.csv')

This function is applyied to the reaction column to create the bin variable.

In [3]:
# Function to bin my target
def my_bins(number):
    if number == 0:
        return 0
    elif number <= 3:
        return 1
    else:
        return 2
    return 3

In [4]:
# Add a total reactions feature
drawception['REACT'] = drawception['LIKE']+drawception['HAHA']+drawception['WOW']+drawception['LOVE']+drawception['DUCK']

# Add image path column
drawception['img_path'] = ['../'+'/'.join(drawception.iloc[index,2].split('/')[3:6]) for index in range(len(drawception))]

# Add target catagorical column
drawception['bin'] = drawception['REACT'].apply(my_bins)

In [5]:
drawception.head(3)

Unnamed: 0,pre_caption,post_caption,image_url,author,panel_number,LIKE,HAHA,WOW,LOVE,DUCK,game_url,player_num,game_date,game_tags,REACT,img_path,bin
0,Guitar,brown guitar,https://cdn.drawception.com/drawings/1032692/1...,Alexyeaheyaha,2,0,0,0,0,0,/game/YC8aMef8Ox/guitar/,12,"December 18th, 2020",['blitz mode'],0,../drawings/1032692/1KqoCkwSjF.png,0
1,brown guitar,Guitar,https://cdn.drawception.com/drawings/1041492/c...,celemon,4,0,0,0,0,0,/game/YC8aMef8Ox/guitar/,12,"December 18th, 2020",['blitz mode'],0,../drawings/1041492/ct5kbWktZ9.png,0
2,Guitar,acoustic guitar,https://cdn.drawception.com/drawings/681336/dz...,Sarramiah and Daughter,6,0,0,0,0,0,/game/YC8aMef8Ox/guitar/,12,"December 18th, 2020",['blitz mode'],0,../drawings/681336/dzq8daFSvo.png,0


## Create Dataset Generator

The image is loaded, reduced in resolution, and converted into a numpy array. The whole thing loops indefinitely for fitting multiple epochs. The label is converted directly to the three columns.

In [6]:
# Generator assistance provided by wiki.python.org

# The iloc call is looking for img_path and is using 15 for that index right now
def image_batch_gen(dataframe, batch_size, end):   
    count = 0
    while(1):
        image_batch = np.empty((batch_size, 250, 300, 3))
        label_batch = np.empty((batch_size, 3))
        
        for i in range(batch_size):
            # Get image data from a single .png
            my_image = PIL.Image.open(dataframe.iloc[count,15])
            # reduce the image size for faster computation
            my_image = my_image.reduce(2)
            
            # Convert to numpy array and dump into my batchs
            image_batch[i,:,:,:] = np.asarray(my_image)
            
            # Set each target to 0 then change the actual value to 1 (dummify!)
            label_batch[i,0] = 0
            label_batch[i,1] = 0
            label_batch[i,2] = 0
            label_batch[i,drawception.iloc[count, 16]] = 1
            
            # Cycle back through
            count += 1
            if count < end:
                count = 0
        yield image_batch, label_batch

In [7]:
# Train Test Split the DF
# thanks to Andy Hayden on stack exchange
split = np.random.rand(len(drawception)) < 0.8

train_df = drawception[split]
test_df = drawception[~split]

y_train = train_df['REACT']
y_test = test_df['REACT']

In [8]:
# Check that the data is split
print(train_df.shape, '  ', len(y_train))
print(test_df.shape, '  ', len(y_test))

(32602, 17)    32602
(8319, 17)    8319


## Model Creation

Here is where the model is defined.

In [22]:
cnn_model = Sequential()

# Add a convolutional layer.
cnn_model.add(Conv2D(
            filters=256,  # number of filters
            kernel_size=(3, 3),    # height/width of filter
            activation = 'relu',    # activation function 
            input_shape = (250, 300, 3))) # shape of input (image)

cnn_model.add(MaxPooling2D
             (pool_size=(2,2))) # Cut dimensions in half

cnn_model.add(Conv2D(filters = 512,
                    kernel_size = (3,3),
                    activation = 'relu'))

cnn_model.add(MaxPooling2D
             (pool_size=(2,2))) # Cut dimensions in half

cnn_model.add(Flatten())

cnn_model.add(Dense(64, activation='relu'))

cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.5))

cnn_model.add(Dense(32, activation='relu'))
cnn_model.add(Dropout(0.5))

cnn_model.add(Dense(16, activation='relu'))

cnn_model.add(Dense(3, activation='softmax'))

In [23]:
cnn_model.summary()

Model: "sequential_2"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_4 (Conv2D)            (None, 248, 298, 256)     7168      
_________________________________________________________________
max_pooling2d_4 (MaxPooling2 (None, 124, 149, 256)     0         
_________________________________________________________________
conv2d_5 (Conv2D)            (None, 122, 147, 512)     1180160   
_________________________________________________________________
max_pooling2d_5 (MaxPooling2 (None, 61, 73, 512)       0         
_________________________________________________________________
flatten_2 (Flatten)          (None, 2279936)           0         
_________________________________________________________________
dense_10 (Dense)             (None, 64)                145915968 
_________________________________________________________________
dense_11 (Dense)             (None, 32)               

In [24]:
# Compile model
cnn_model.compile(loss='categorical_crossentropy',
                  optimizer='adam',
                  metrics=['accuracy'])

In [25]:
data_len = train_df.shape[0]
my_batch_size = 16

history = cnn_model.fit_generator(image_batch_gen(train_df, my_batch_size, data_len),
                                  steps_per_epoch=data_len//my_batch_size,
                                  epochs=1,
                                  verbose=1)



## Prediction

The prediction needs to be done in batches aswell because of the large amount of data. This function loads and converts one image at a time, makes the prediction, then outputs a dataframe. The true and predicted values as well as the probabilities of each class are recorded.

In [26]:
def predict_batch(dataframe, model):   
    
    output = []
    
    image_dat = np.empty((1, 250, 300, 3))
    for i in range(dataframe.shape[0]):
        my_image = PIL.Image.open(dataframe.iloc[i,15])
        my_image = my_image.reduce(2)
            
        image_dat[0,:,:,:] = np.asarray(my_image)
        pred = model.predict(image_dat)
        
        # Unpack the probabilities and assemble a prediction dataframe
        img_dic = {}
        img_dic['yp0'] = pred[0][0]
        img_dic['yp1'] = pred[0][1]
        img_dic['yp2'] = pred[0][2]
        if pred[0][0] == max(pred[0]):
            img_dic['y_pred'] = 0
        elif pred[0][1] == max(pred[0]):
            img_dic['y_pred'] = 1
        else:
            img_dic['y_pred'] = 2
        img_dic['y_true'] = dataframe.iloc[i,16]
        
        output.append(img_dic)
            
    return pd.DataFrame(output)

Run the prediction

In [27]:
pred_df = predict_batch(test_df, cnn_model)

In [28]:
pred_df.head()

Unnamed: 0,yp0,yp1,yp2,y_pred,y_true
0,0.999999,5.596637e-07,8.541522e-07,0,0
1,1.0,2.4032169999999998e-30,5.50079e-29,0,1
2,1.0,0.0,0.0,0,1
3,1.0,3.466679e-14,1.174502e-13,0,1
4,1.0,0.0,0.0,0,1


In [None]:
# Save prediction to a csv file   ### Specify Filename ###
pred_df.to_csv('../data/prediction_jan24.csv', index=False)

## Output Images

The following code was used to output images that had high probability in different bins. First identify the images then output them

In [57]:
# Input index values here
indexes = [5181,303,4818]
indexes

[5181, 303, 4818]

In [35]:
# Add the predicted probabilities to the dataframe
test_df['bin0'] = pred_df['yp0'].copy()
test_df['bin1'] = pred_df['yp1'].copy()
test_df['bin2'] = pred_df['yp2'].copy()

# Reset the index so I can use iloc later
test_df.reset_index(inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test_df['bin0'] = pred_df['yp0'].copy()


In [52]:
test_df.sort_values(by='bin2', ascending=False).head(5)

Unnamed: 0,pre_caption,post_caption,image_url,author,panel_number,LIKE,HAHA,WOW,LOVE,DUCK,game_url,player_num,game_date,game_tags,REACT,img_path,bin,bin1,bin2,bin0
675,naked king is sad cos banana died :(,Sexy person,https://cdn.drawception.com/drawings/835536/DM...,artist llama lizard quee,8,0,0,2,0,0,/game/aQb5Omn1f8/queen/,12,"December 18th, 2020",['blitz mode'],2,../drawings/835536/DMgG0Xp2at.png,1,0.090108,0.085851,0.824041
3878,geico lizard,geico lizard,https://cdn.drawception.com/drawings/860500/BL...,SpookiiFaith12,4,0,1,0,0,0,/game/73W5rmYEhn/praying-mantis/,12,"December 19th, 2020",[],1,../drawings/860500/BLO3NvgVZc.png,1,0.087577,0.083699,0.828724
5181,Umbrellas in love,Umbrellas so in love they defy physics,https://cdn.drawception.com/drawings/1027314/m...,evilturnip,6,0,0,0,0,0,/game/nBf7h66pNw/umbrella-sword-fight/,12,"December 20th, 2020",[],0,../drawings/1027314/mTZSNASVMY.png,0,0.082678,0.081537,0.835786
303,cake by the ocean(otherwise its a sea sponge),cupcake on the beach,https://cdn.drawception.com/drawings/1024548/H...,Funkmaster77,6,1,0,0,0,0,/game/YMGPfWP87s/cheese-on-a-beach/,12,"December 18th, 2020",[],1,../drawings/1024548/HcKRnynHms.png,1,0.083678,0.080664,0.835658
4818,the earth splitting in half,draw_last,https://cdn.drawception.com/drawings/1038529/4...,Spegtator,12,0,0,0,6,0,/game/eSLnmZWP6Y/earthy-jupiter/,12,"December 20th, 2020",[],6,../drawings/1038529/4omb1JZj8f.png,2,0.083678,0.080664,0.835658


In [None]:
# Input index values here
indexes = [5181,303,4818]
indexes

In [None]:
for i, ind in enumerate(indexes):
    copy_image = PIL.Image.open(drawception.iloc[ind,15])
    copy_image = copy_image.reduce(2)
    copy_image.save(f'../slide_images/pred_bin2_{i}.png')