In [1]:
import os
import shutil
from tqdm import tqdm
from random import shuffle
import cv2
import numpy as np

TRAIN_HOT_DOG_DIR = "hot-dog-not-hot-dog/train/hot_dog"
TRAIN_NOT_HOT_DOG_DIR = "hot-dog-not-hot-dog/train/not_hot_dog"

TEST_HOT_DOG_DIR = "hot-dog-not-hot-dog/test/hot_dog"
TEST_NOT_HOT_DOG_DIR = "hot-dog-not-hot-dog/test/not_hot_dog"

IMAGE_SIZE = 72
LR = 1e-5
MODEL_NAME = "hot-dog-not-hot-dog_{}".format("version-1")

In [2]:
print(len(os.listdir(TRAIN_HOT_DOG_DIR)))
print(len(os.listdir(TRAIN_NOT_HOT_DOG_DIR)))

1673
1308


In [3]:
def balance_data():
    count=0
    for img in tqdm(os.listdir(TRAIN_HOT_DOG_DIR)):
        if count>=1300:
            os.unlink(os.path.join(TRAIN_HOT_DOG_DIR,img))
            count=count+1
        else:
            count=count+1
            
    count=0
    for img in tqdm(os.listdir(TRAIN_NOT_HOT_DOG_DIR)):
        if count>=1300:
            os.unlink(os.path.join(TRAIN_NOT_HOT_DOG_DIR,img))
            count=count+1
        else:
            count=count+1
            
balance_data()

100%|██████████| 1673/1673 [00:00<00:00, 50617.26it/s]
100%|██████████| 1308/1308 [00:00<00:00, 866279.75it/s]


In [4]:
print(len(os.listdir(TRAIN_HOT_DOG_DIR)))
print(len(os.listdir(TRAIN_NOT_HOT_DOG_DIR)))
print(len(os.listdir(TEST_HOT_DOG_DIR)))
print(len(os.listdir(TEST_NOT_HOT_DOG_DIR)))

1300
1300
250
250


In [5]:
def create_train_data():
    
    #hot-dog = [1,0]
    #not-hot-dog = [0,1]
    
    training_data = []
    for img in tqdm(os.listdir(TRAIN_HOT_DOG_DIR)):
        label = [1,0]
        path = os.path.join(TRAIN_HOT_DOG_DIR, img)
        if os.path.exists(path):
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                training_data.append([np.array(img), np.array(label)])
    
    for img in tqdm(os.listdir(TRAIN_NOT_HOT_DOG_DIR)):
        label = [0,1]
        path = os.path.join(TRAIN_NOT_HOT_DOG_DIR, img)
        if os.path.exists(path):
            img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
            if img is not None:
                img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                training_data.append([np.array(img), np.array(label)])
    
    shuffle(training_data)
    return training_data

In [6]:
def create_validation_and_test_data():
    
    #hot-dog = [1,0]
    #not-hot-dog = [0,1]
    
    count=0
    testing_data = []
    validation_data = []
    for img in tqdm(os.listdir(TEST_HOT_DOG_DIR)):
        if count<200:
            label = [1,0]
            path = os.path.join(TEST_HOT_DOG_DIR, img)
            if os.path.exists(path):
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                    validation_data.append([np.array(img), np.array(label)])
            count=count+1
        else:
            label = [1,0]
            path = os.path.join(TEST_HOT_DOG_DIR, img)
            if os.path.exists(path):
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                    testing_data.append([np.array(img), np.array(label)])
            count=count+1
            
    count=0
    for img in tqdm(os.listdir(TEST_NOT_HOT_DOG_DIR)):
        if count<200:
            label = [0,1]
            path = os.path.join(TEST_NOT_HOT_DOG_DIR, img)
            if os.path.exists(path):
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                    validation_data.append([np.array(img), np.array(label)])
            count=count+1
        else:
            label = [0,1]
            path = os.path.join(TEST_NOT_HOT_DOG_DIR, img)
            if os.path.exists(path):
                img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
                if img is not None:
                    img = cv2.resize(img, (IMAGE_SIZE, IMAGE_SIZE))
                    testing_data.append([np.array(img), np.array(label)])
            count=count+1
    
    shuffle(testing_data)
    shuffle(validation_data)
    return [validation_data, testing_data]

In [7]:
train_data = create_train_data()
np.save("train_data.npy",train_data)

100%|██████████| 1300/1300 [00:05<00:00, 246.95it/s]
100%|██████████| 1300/1300 [00:06<00:00, 202.18it/s]


In [8]:
data = create_validation_and_test_data()
validation_data = data[0]
test_data = data[1]
np.save("validation_data.npy",data[0])
np.save("test_data.npy",data[1])

  9%|▉         | 23/250 [00:00<00:01, 226.96it/s]

[[ 2  2  1 ... 12 12 12]
 [ 2  2  1 ...  8  6  4]
 [ 2  2  1 ...  7  4  1]
 ...
 [ 5  7  9 ...  0  0  0]
 [ 5  8 10 ...  0  0  0]
 [ 6  8 11 ...  0  0  0]]
[[  4   4   4 ...   3   5   6]
 [  5   5   6 ...   3   3   4]
 [  7   7   8 ...   3   3   3]
 ...
 [171 167 167 ... 170 170 171]
 [164 168 163 ... 164 165 168]
 [170 178 168 ... 165 166 170]]
[[192 192 193 ...  11   9   5]
 [192 193 193 ...  11   8   5]
 [192 192 193 ...  10   8   6]
 ...
 [ 32  32  32 ... 151 152 153]
 [ 32  32  32 ... 152 153 153]
 [ 32  32  32 ... 154 154 154]]
[[ 2  2  2 ...  4  4  4]
 [ 2  2  2 ...  3  3  3]
 [ 2  2  2 ...  3  3  3]
 ...
 [25 30 34 ... 29 29 28]
 [24 30 35 ... 29 29 29]
 [24 29 35 ... 28 27 27]]
[[176 176 177 ...  85  87  78]
 [176 177 177 ...  83  82  85]
 [177 177 177 ...  90  81  88]
 ...
 [180 181 182 ... 176 178 180]
 [177 179 182 ... 175 177 180]
 [179 180 180 ... 176 177 178]]
[[154 153 154 ... 166 170 175]
 [154 153 153 ... 166 167 168]
 [156 154 152 ... 171 170 169]
 ...
 [ 70  73  77 

 32%|███▏      | 79/250 [00:00<00:00, 192.75it/s]

[[219 215 205 ... 208 206 204]
 [226 225 218 ... 208 208 208]
 [221 223 222 ... 207 209 210]
 ...
 [171 164 158 ... 113 115 119]
 [171 168 164 ... 106 112 116]
 [171 170 167 ...  99 102  97]]
[[103 102 101 ...   3   4   5]
 [103 102 101 ...   3   4   5]
 [102 102 101 ...   3   4   5]
 ...
 [ 12  12  11 ...   7   9  11]
 [ 13  12  10 ...   9  10  11]
 [ 13  12  10 ...  10  11  11]]
[[114 119 111 ... 106 105 105]
 [122 124 112 ... 105 106 107]
 [118 119 111 ... 105 108 110]
 ...
 [228 229 229 ... 106 112 111]
 [230 234 240 ... 109 112 108]
 [240 235 234 ... 108 110 104]]
[[10 12 16 ...  3  3  6]
 [12 15 17 ...  5  6  8]
 [13 14 16 ... 12 11 12]
 ...
 [12 13 22 ...  1  0  0]
 [14 19 25 ...  1  0  0]
 [16 25 26 ...  1  0  0]]
[[146 144 143 ...  21  21  21]
 [139 138 136 ...  24  24  24]
 [128 126 124 ...  28  27  27]
 ...
 [135 134 134 ...  63  61  59]
 [133 133 133 ...  59  57  55]
 [133 133 133 ...  55  54  53]]
[[ 60  62  64 ...  76  76  72]
 [ 61  62  62 ...  68  67  63]
 [ 68  68  68 

 48%|████▊     | 120/250 [00:00<00:00, 196.95it/s]

[[220 223 224 ... 220 220 220]
 [225 226 226 ... 231 231 230]
 [230 229 228 ... 236 235 234]
 ...
 [215 235 236 ... 219 218 217]
 [208 231 235 ... 219 218 217]
 [202 228 235 ... 218 218 218]]
[[  8   6   6 ... 196 195 194]
 [  7   6   6 ... 196 195 195]
 [  8   7   7 ... 197 196 196]
 ...
 [155 159 163 ...  26  26  24]
 [156 161 166 ...  21  20  15]
 [159 164 169 ...  13  10   4]]
[[185 187 188 ... 179 177 176]
 [183 185 189 ... 180 181 181]
 [179 183 187 ... 179 182 184]
 ...
 [139 137 136 ... 153 156 160]
 [136 135 133 ... 155 157 159]
 [136 134 132 ... 162 163 165]]
[[ 48  48  48 ...  22  21  20]
 [ 44  45  45 ...  21  20  19]
 [ 44  45  46 ...  22  22  20]
 ...
 [128 129 128 ... 143 143 142]
 [127 127 125 ... 143 143 142]
 [125 126 124 ... 142 142 142]]
[[ 14  13   8 ...   5   5   4]
 [ 18  19  15 ...   6   6   6]
 [ 25  20  18 ...   7   7   7]
 ...
 [180 179 186 ...  75  96 100]
 [183 183 183 ...  81 100 108]
 [188 186 178 ...  87 107 122]]
[[189 195 194 ... 240 237 236]
 [189 196

 57%|█████▋    | 143/250 [00:00<00:00, 204.33it/s]

[[159 159 159 ...   9   9  10]
 [160 162 163 ...   7   8   8]
 [167 169 173 ...   7   8   8]
 ...
 [ 28  28  27 ...   3   3   3]
 [ 28  28  27 ...   3   3   3]
 [ 28  28  28 ...   3   4   4]]
[[ 2  2  2 ...  7  7  7]
 [ 2  2  2 ...  4  5  5]
 [ 2  2  2 ...  3  3  4]
 ...
 [ 9  8  6 ... 10 11 13]
 [ 6  5  4 ... 17 16 20]
 [ 6  5  5 ... 17 11 11]]
[[68 68 68 ... 64 63 63]
 [70 70 70 ... 64 63 63]
 [71 71 71 ... 64 64 64]
 ...
 [51 50 49 ...  2  2  2]
 [52 51 49 ...  2  2  2]
 [53 51 49 ...  2  2  2]]
[[152 152 152 ... 154 154 154]
 [152 152 152 ... 154 154 154]
 [152 152 152 ... 154 154 154]
 ...
 [148 148 148 ... 141 141 142]
 [148 148 148 ... 141 141 142]
 [148 148 148 ... 141 141 142]]
[[249 249 249 ...  96 109 119]
 [246 247 248 ...  83  99 112]
 [242 244 247 ...  68  85  99]
 ...
 [ 34  31  37 ...  24  24  24]
 [ 54  38  30 ...  23  23  23]
 [ 63  44  33 ...  23  23  23]]
[[131 130 127 ... 205 204 202]
 [136 135 132 ... 205 206 206]
 [142 143 140 ... 204 207 209]
 ...
 [192 199 209 

 75%|███████▌  | 188/250 [00:00<00:00, 210.12it/s]

[[177 178 178 ... 121 120 122]
 [176 177 177 ... 122 121 123]
 [176 176 176 ... 123 122 123]
 ...
 [150 148 153 ...  10   9   9]
 [150 149 154 ...   9   8   8]
 [151 150 155 ...   9   8   7]]
[[ 6  6  7 ...  4  5  5]
 [ 1  2  2 ...  3  4  4]
 [ 1  1  1 ...  3  3  3]
 ...
 [ 2  2  3 ... 76 76 76]
 [ 2  2  3 ... 77 77 77]
 [ 2  3  3 ... 78 79 79]]
[[ 19  20  24 ... 218 228 236]
 [ 24  24  26 ... 226 232 231]
 [ 25  23  23 ... 234 236 228]
 ...
 [ 57  59  60 ... 251 251 252]
 [ 57  57  59 ... 251 251 252]
 [ 54  55  58 ... 251 251 252]]
[[210 208 208 ... 204 207 210]
 [204 208 209 ... 212 212 208]
 [205 212 213 ... 219 217 209]
 ...
 [183 177 180 ... 197 192 216]
 [198 188 195 ... 203 201 208]
 [181 179 191 ... 225 223 202]]
[[118 102 100 ...  80 132 183]
 [113 105 101 ...  73 115 164]
 [110 109 103 ...  70 102 132]
 ...
 [224 225 219 ...  88  89  89]
 [227 223 221 ...  89  88  87]
 [225 221 218 ...  88  86  85]]
[[106 117 123 ...   9   9   8]
 [114 126 134 ...   9   9   8]
 [130 133 135 

100%|██████████| 250/250 [00:01<00:00, 198.98it/s]
100%|██████████| 250/250 [00:01<00:00, 211.37it/s]


In [9]:
print(len(train_data))
print(len(validation_data))
print(len(test_data))

2530
400
100


In [None]:
import matplotlib.pyplot as plt
%matplotlib inline

fig = plt.figure(figsize=(10,10))

for num, img_data in enumerate(train_data[:20]):
    img = img_data[0]
    img_label = img_data[1]
    y = fig.add_subplot(4,5,num+1)
    if img_label[0] == 1:
        str = "Hot-Dog"
    else:
        str = "Not-Hot-Dog"
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
    
    y.imshow(img,cmap='gray')
    plt.title(str)
plt.show()

In [None]:
import tflearn
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.estimator import regression

import tensorflow as tf
tf.reset_default_graph()

convnet = input_data(shape=[None, IMAGE_SIZE, IMAGE_SIZE, 1], name="input")

convnet = conv_2d(convnet, nb_filter=32, filter_size=5, activation="relu", strides=1, padding="same")
convnet = max_pool_2d(convnet, 3)

convnet = conv_2d(convnet, nb_filter=64, filter_size=5, activation="relu", strides=1, padding="same")
convnet = max_pool_2d(convnet, 3)

convnet = conv_2d(convnet, nb_filter=128, filter_size=5, activation="relu", strides=1, padding="same")
convnet = max_pool_2d(convnet, 3)

convnet = conv_2d(convnet, nb_filter=64, filter_size=5, activation="relu", strides=1, padding="same")
convnet = max_pool_2d(convnet, 3)

convnet = fully_connected(convnet, 1024, activation="relu")
convnet = dropout(convnet, 0.5)

convnet = fully_connected(convnet, 2, activation="softmax")
convnet = regression(convnet, optimizer="rmsprop", learning_rate=LR, loss="categorical_crossentropy", name="targets")

model = tflearn.DNN(convnet, tensorboard_dir="log")

In [None]:
X = np.array([i[0] for i in train_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
Y = np.array([i[1] for i in train_data])

validation_x = np.array([i[0] for i in validation_data]).reshape(-1, IMAGE_SIZE, IMAGE_SIZE, 1)
validation_y = np.array([i[1] for i in validation_data])


print(X.shape)
print(Y.shape)
print(validation_x.shape)
print(validation_y.shape)

In [None]:
model.fit({"input": X}, 
          {"targets": Y},
          n_epoch=5,
          validation_set=({"input": validation_x}, {"targets": validation_y}),
          show_metric=True, run_id=MODEL_NAME)

In [None]:
model.save(MODEL_NAME)

In [None]:
fig1 = plt.figure(figsize=(15,15))

for num, img_data in enumerate(test_data[-12:]):
    img = img_data[0]
    img_label = img_data[1]
    data = img.reshape(IMAGE_SIZE, IMAGE_SIZE, 1)
    model_out = model.predict([data])[0]
    print(model_out)
    y = fig1.add_subplot(3,4,num+1)
    if np.argmax(model_out) == 0:
        str = "Hot-Dog"
    else:
        str = "Not-Hot-Dog"
    
    if img_label[0] == 1:
        str1 = "Hot-Dog"
    else:
        str1 = "Not-Hot-Dog"
    
    y.imshow(img,cmap='gray')
    plt.title("P: "+str+" A: "+str1)
    y.axes.get_xaxis().set_visible(False)
    y.axes.get_yaxis().set_visible(False)
    