<a href="https://colab.research.google.com/github/juliakay/ml-fun/blob/master/Hot_Dog_or_Not_Hot_Dog.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#### Copyright 2019 Google LLC.

In [0]:
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Hot Dog or Not Hot Dog

There are very few more important questions in life than "[Hot dog or not hot dog?](https://www.youtube.com/watch?v=ACmydtFDTGs)". For this challenge you will be tasked with creating a machine learning model that can take an input image and determine if the image is of a hot dog or not a hot dog.

Train your model with the [Kaggle Hot Dog/Not Hot Dog](https://www.kaggle.com/dansbecker/hot-dog-not-hot-dog/data) data set. Feel free to [do some background research](https://medium.com/@timanglade/how-hbos-silicon-valley-built-not-hotdog-with-mobile-tensorflow-keras-react-native-ef03260747f3) on the topic.

We have looked a regression, classification, and clustering models. We have used the Scikit Learn, TensorFlow, and Keras toolkits. Feel free to use the model and toolkit that you feel is the most appropriate.

In [0]:
import zipfile
zip_ref = zipfile.ZipFile('hot-dog-not-hot-dog.zip', 'r')
zip_ref.extractall('./')
zip_ref.close()

import glob
train_dog_images = glob.glob('train/hot_dog' + '/*.jpg')


import IPython.display as display
# display.display(display.Image(images[0]))

# for i in train_dog_images:
#   display.display(display.Image(i))

from __future__ import absolute_import, division, print_function

import tensorflow as tf
import matplotlib.pyplot as plt

tf.enable_eager_execution()
AUTOTUNE = tf.data.experimental.AUTOTUNE

# session = tf.Session()

def preprocess_image(image):
  # encode image to tensor
  image = tf.image.decode_jpeg(image, channels=3)
  
  # resize image
  image = tf.image.resize_images(image, [150, 150])
  
  # normalize
  image /= 255.0

  return image


def load_and_preprocess_image(path):
  image = tf.read_file(path)
  return preprocess_image(image)

In [0]:
# unzip files
import zipfile
zip_ref = zipfile.ZipFile('hot-dog-not-hot-dog.zip', 'r')
zip_ref.extractall('./')
zip_ref.close()

In [0]:
# image processing
from __future__ import absolute_import, division, print_function

import tensorflow as tf
import matplotlib.pyplot as plt

tf.enable_eager_execution()
AUTOTUNE = tf.data.experimental.AUTOTUNE

def preprocess_image(image):
  # encode image to tensor
  image = tf.image.decode_jpeg(image, channels=3)
  
  # resize image
  image = tf.image.resize_images(image, [160, 160])
  
  # normalize
  image /= 255.0

  return image


def load_and_preprocess_image(path):
  image = tf.read_file(path)
  return preprocess_image(image)


In [0]:
# grab all file names
import glob
train_dog_images = glob.glob('train/hot_dog' + '/*.jpg')
train_notdog_images = glob.glob('train/not_hot_dog' + '/*.jpg')

In [0]:
# Dog training data
# unpack all images from train/hot dog folder 
import numpy as np

path_ds = tf.data.Dataset.from_tensor_slices(train_dog_images)
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

# initialize dog labels
is_dog = np.ones(len(train_dog_images), dtype=int)

is_dog_label_ds = tf.data.Dataset.from_tensor_slices(is_dog)
# print(label_ds)

# create dataset with image and labels
image_label_ds = tf.data.Dataset.zip((image_ds, is_dog_label_ds))

print('image shape: ', image_label_ds.output_shapes[0])
print('label shape: ', image_label_ds.output_shapes[1])
print('types: ', image_label_ds.output_types)
print()
print(image_label_ds)

# plot figures
# plt.figure(figsize=(8,8))
# for n,image in enumerate(image_ds.take(4)):
#   plt.subplot(2,2,n+1)
#   plt.imshow(image)
#   plt.grid(False)
#   plt.xticks([])
#   plt.yticks([])
# #   plt.xlabel(caption_image(train[n]))



Instructions for updating:
Use `tf.compat.v1.data.get_output_shapes(dataset)`.
image shape:  (160, 160, 3)
label shape:  ()
Instructions for updating:
Use `tf.compat.v1.data.get_output_types(dataset)`.
types:  (tf.float32, tf.int64)

<DatasetV1Adapter shapes: ((160, 160, 3), ()), types: (tf.float32, tf.int64)>


In [0]:
# Not dog training data
# unpack all images from train/not hot dog folder
path_ds = tf.data.Dataset.from_tensor_slices(train_notdog_images)
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

# initialize dog labels
is_notdog = np.zeros(len(train_notdog_images), dtype=int)
label_ds = tf.data.Dataset.from_tensor_slices(is_notdog)
image_label_ds_not = tf.data.Dataset.zip((image_ds, label_ds))
final_train = image_label_ds.concatenate(image_label_ds_not)

In [0]:
# Setting a shuffle buffer size as large as the dataset ensures that the data is
# completely shuffled.
image_count = len(train_dog_images) + len(train_notdog_images)
ds = final_train.shuffle(buffer_size=image_count)
ds = ds.repeat()
ds = ds.batch(32)
# `prefetch` lets the dataset fetch batches, in the background while the model is training.
ds = ds.prefetch(buffer_size=AUTOTUNE)
ds


<DatasetV1Adapter shapes: ((?, 160, 160, 3), (?,)), types: (tf.float32, tf.int64)>

In [0]:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=(160, 160, 3), include_top=False)
mobile_net.trainable=False

Downloading data from https://github.com/JonathanCMitchell/mobilenet_v2_keras/releases/download/v1.1/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5


In [0]:
#This model expects its input to be normalized to the [-1,1] range:

def change_range(image,label):
  return 2*image-1, label

keras_ds = ds.map(change_range)

# The dataset may take a few seconds to start, as it fills its shuffle buffer.
image_batch, label_batch = next(iter(keras_ds))

feature_map_batch = mobile_net(image_batch)
print(feature_map_batch.shape)

(32, 5, 5, 1280)


In [0]:
model = tf.keras.Sequential([
  mobile_net,
  tf.keras.layers.GlobalAveragePooling2D(),
  tf.keras.layers.Dense(image_count)])

logit_batch = model(image_batch).numpy()

print("min logit:", logit_batch.min())
print("max logit:", logit_batch.max())
print()

print("Shape:", logit_batch.shape)

min logit: -3.5233228
max logit: 4.312913

Shape: (32, 498)


In [0]:
model.compile(optimizer=tf.train.AdamOptimizer(), 
              loss=tf.keras.losses.sparse_categorical_crossentropy,
              metrics=["accuracy"])
len(model.trainable_variables) 


2

In [0]:
model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_160 (Model) (None, 5, 5, 1280)        2257984   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 498)               637938    
Total params: 2,895,922
Trainable params: 637,938
Non-trainable params: 2,257,984
_________________________________________________________________


In [0]:
model.fit(ds, epochs=10, steps_per_epoch=3)


Epoch 1/10
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f6097936da0>

In [0]:
#TESTING-DOG
import numpy as np
test_dog_images = glob.glob('test/hot_dog' + '/*.jpg')
# unpack all images from test/hot dog folder
path_ds = tf.data.Dataset.from_tensor_slices(test_dog_images)
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

# initialize dog labels
is_dog = np.ones(len(test_dog_images), dtype=int)
label_ds = tf.data.Dataset.from_tensor_slices(is_dog)

# print(label_ds)
# create dataset with image and labels
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

#TESTING-NOT DOG 
test_not_dog_images = glob.glob('test/not_hot_dog' + '/*.jpg')

#unpack all images from test/not hot dog folder 
path_ds = tf.data.Dataset.from_tensor_slices(test_not_dog_images)
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

#initialize not dog labels 
is_notdog = np.zeros(len(test_not_dog_images), dtype=int)
label_ds = tf.data.Dataset.from_tensor_slices(is_notdog)
image_label_ds_not = tf.data.Dataset.zip((image_ds, label_ds))

final_test = image_label_ds.concatenate(image_label_ds_not)

print('image shape: ', final_test.output_shapes[0])
print('label shape: ', final_test.output_shapes[1])
print('types: ', final_test.output_types)
print()
print(final_test)

image shape:  (160, 160, 3)
label shape:  ()
types:  (tf.float32, tf.int64)

<DatasetV1Adapter shapes: ((160, 160, 3), ()), types: (tf.float32, tf.int64)>


In [0]:
model.predict(final_test, steps=3)

ValueError: ignored