<a href="https://colab.research.google.com/github/joshua-friede/cat-dog-classifier/blob/master/IE6380_Lab_03.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Run this first

In [0]:
# 1. Download the dataset
!wget https://user.engineering.uiowa.edu/~sbaek/files/cats_and_dogs.zip  # '!wget' command allows you to download files to your virtual machine
!unzip -q cats_and_dogs.zip # '!unzip' is for extracting zip compressed files. '-q' is a toggle option for 'quiet' (otherwise, it will produce a long log).


# 2. Read the dataset
import os  # import os library for reading file paths
import cv2 # import opencv library for image handling

from tqdm import tqdm   # tqdm is a useful library for visualizing a small progress bar when you run a for loop.

cat_list = ['PetImages/Cat/' + file for file in os.listdir('PetImages/Cat') if file.endswith('.jpg')]   # file paths for cat images
dog_list = ['PetImages/Dog/' + file for file in os.listdir('PetImages/Cat') if file.endswith('.jpg')]   # file paths for dog images

x = [] # empty list for storing images
y = [] # empty list for storing image labels. we will use one-hot encoding [1, 0] = cat, [0, 1] = dog

for f in tqdm(cat_list): # this is how you use tqdm
# for f in cat_list: # this is without-tqdm version
  img = cv2.imread(f)  # read an image from the file list
  if img is not None:  # if image reading successful
    x.append(cv2.resize(img, (28,28))/255.0)  # append new image to the image list. resize and normalize before to append.
    y.append([1, 0])  # append new label to the label list
  
for f in tqdm(dog_list):
  img = cv2.imread(f)
  if img is not None:
    x.append(cv2.resize(img, (28,28))/255.0)
    y.append([0,1])
 

# 3. Train-test split
from sklearn.model_selection import train_test_split
import time # this is for generating random seed

train_x, test_x, train_y, test_y = train_test_split(x, y, test_size=0.3, random_state=int(time.time()))

print(len(x), len(train_x), len(test_x))



# 4. Build a model 
import tensorflow as tf

# placeholder for the dataset
X = tf.placeholder(tf.float32, [None, 28, 28, 3])
Y = tf.placeholder(tf.float32, [None, 2])

# model parameters
W = tf.Variable(tf.zeros([2352, 2]))
b = tf.Variable(tf.zeros([2]))

# model ops
Z = tf.matmul(tf.reshape(X, [-1, 2352]), W) + b
f = tf.divide(tf.exp(Z), tf.reduce_sum(tf.exp(Z)))
# f = tf.nn.softmax(Z)  # tensorflow provides softmax function for your convenience


# 5. Design an optimizer
e = -tf.reduce_sum(Y * tf.log(f)) # cross entropy loss
optimizer = tf.train.AdamOptimizer(0.01).minimize(e) # adam optimizer

# accuracy. (cross entropy can be less intuitive. a good practice would be to trace accuracy alongside cross entropy
is_correct = tf.equal(tf.argmax(f,1), tf.argmax(Y,1))
accuracy = tf.reduce_mean(tf.cast(is_correct, tf.float32))


# 6. Train
with tf.Session() as sess:
  sess.run(tf.global_variables_initializer()) # we have variables W and b which need to be initialized
  
  for i in range(100): # for some fixed number of iterations
    sess.run(optimizer, feed_dict={X: train_x, Y: train_y}) # run one step of iteration
    acc, ce = sess.run([accuracy, e], feed_dict={X: train_x, Y: train_y}) # evaluate accuracy and cross entropy error
    print(i, acc, ce) # print them
    
  acc, ce = sess.run([accuracy, e], feed_dict={X: test_x, Y: test_y}) # when the iteration is over, test the model against the test dataset
  print('TEST', acc, ce)

--2019-02-14 18:47:06--  https://user.engineering.uiowa.edu/~sbaek/files/cats_and_dogs.zip
Resolving user.engineering.uiowa.edu (user.engineering.uiowa.edu)... 128.255.17.187
Connecting to user.engineering.uiowa.edu (user.engineering.uiowa.edu)|128.255.17.187|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 824894548 (787M) [application/zip]
Saving to: ‘cats_and_dogs.zip’


2019-02-14 18:49:34 (5.52 MB/s) - ‘cats_and_dogs.zip’ saved [824894548/824894548]



100%|██████████| 12500/12500 [00:27<00:00, 460.15it/s]
100%|██████████| 12500/12500 [00:28<00:00, 442.72it/s]


24946 17462 7484
Instructions for updating:
Colocations handled automatically by placer.
0 0.5021189 288781.5
1 0.4944451 234589.12
2 0.49811018 233657.78
3 0.53951436 192459.64
4 0.50166076 221076.34
5 0.5010881 224728.9
6 0.5052113 198849.66
7 0.49902645 204129.25
8 0.4989692 218371.47
9 0.5000573 202360.88
10 0.5256557 191567.34
11 0.50223345 205582.47
12 0.5036651 197842.03
13 0.50080174 184905.0
14 0.49891192 197510.1
15 0.49954185 190196.38
16 0.50641394 186062.9
17 0.5014889 193427.38
18 0.5397434 183715.31
19 0.49868286 190093.22
20 0.49816746 189757.22
21 0.5646547 183366.08
22 0.5087046 189825.78
23 0.5513687 184423.19
24 0.49937007 186123.69
25 0.49816746 187347.9
26 0.5805177 182808.61
27 0.51053715 186705.72
28 0.5341885 184037.33
29 0.5134578 183937.9
30 0.5026343 185405.56
31 0.5938037 182405.0
32 0.5080747 184927.7
33 0.5418623 183035.11
34 0.5174665 183707.28
35 0.5167793 183835.81
36 0.55217046 182744.7
37 0.51706564 184002.1
38 0.58395374 182315.72
39 0.517352 183720

# Lab

In [0]:
yb = []
for i in range(len(y)):  # y[i] = [1,0] or [0,1]
  yb.append( [ y[i][0] ] )
  
train_x, test_x, train_yb, test_yb = train_test_split(x, yb, test_size=0.3)

In [0]:
# Step 1. Build Perceptron
X = tf.placeholder(tf.float32, [None, 28, 28, 3])
Y = tf.placeholder(tf.float32, [None, 1])

W = tf.Variable(tf.random_normal([28*28*3, 1], stddev=0.01))
b = tf.Variable(tf.random_normal([1], stddev=0.01))

reshaped_X = tf.reshape(X, [-1, 28*28*3])
Z = tf.matmul(reshaped_X, W) + b
f = 0.5*tf.sign(Z) + 0.5

In [0]:
# Step 2. Loss and Optimizer
delta = Y - f
loss = tf.reduce_mean( tf.square(delta) ) # MSE

# optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)
lr = 0.001
Xd = tf.transpose( tf.multiply(reshaped_X, delta) ) # rX = [N, d] del = [N, 1]
dW = tf.reduce_sum(Xd, axis=1, keepdims=True) # Xd = [d, N] --> [d, 1]

db = tf.reduce_sum(delta, axis=0)

Wupdate = tf.assign_sub(W, lr*dW)  # assign_sub  -=
bupdate = b.assign_sub(lr*db)

optimizer = [Wupdate, bupdate]

In [0]:
# Step 3. Train

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  
  for i in range(10):
    sess.run(optimizer, feed_dict={X: train_x, Y: train_yb})
    mse = sess.run(loss, feed_dict={X: train_x, Y: train_yb})
    print(i, mse)

In [0]:
# Model Definition

X = tf.placeholder(tf.float32, [None, 28, 28, 3])
Y = tf.placeholder(tf.float32, [None, 1])

W = tf.Variable(tf.random_normal([28*28*3,1], stddev=0.01))
b = tf.Variable(tf.random_normal([1], stddev=0.01))

reshaped_X = tf.reshape(X, [-1, 28*28*3]) - 0.5 # normalize
Z = tf.matmul(reshaped_X, W) + b

f = tf.sigmoid(Z)


In [0]:
# Loss and Optmizer

delta = Y - f
loss = tf.reduce_mean( tf.square(delta) )

optimizer = tf.train.AdamOptimizer(0.01).minimize(loss)



In [0]:
# Train

with tf.Session() as sess:
  sess.run(tf.global_variables_initializer())
  
  for i in range(100):
    sess.run(optimizer, feed_dict={X: train_x, Y: train_yb})
    loss_val = sess.run(loss, feed_dict={X:train_x, Y: train_yb})
    print(loss_val)

0.31875548
0.27743363
0.27150232
0.25736573
0.2662277
0.26318577
0.24641636
0.24734837
0.24709973
0.2417731
0.24456102
0.24410036
0.23948112
0.23873833
0.23621085
0.23611897
0.23814051
0.23452476
0.23099968
0.23275295
0.23352647
0.23320794
0.23138033
0.2285933
0.2295837
0.2305431
0.22845326
0.22733563
0.22700587
0.22736008
0.22746664
0.22585736
0.22544742
0.22549307
0.22502144
0.22486238
0.22420205
0.22403009
0.22393534
0.2235688
0.22343363
0.22281215
0.22256453
0.22254048
0.22227788
0.2219617
0.22160073
0.22166015
0.22140855
0.22100155
0.22080438
0.22070436
0.22056493
0.22021887
0.22009693
0.21996216
0.21977551
0.21956275
0.21939747
0.21928771
0.21910915
0.2189728
0.2187905
0.21866496
0.2185201
0.21837844
0.21822092
0.21810675
0.21799496
0.21784231
0.2177125
0.21760002
0.21748523
0.21734287
0.21723974
0.21713132
0.21701825
0.21689838
0.21680114
0.21669303
0.21658936
0.21648501
0.21638882
0.21628904
0.21619242
0.21609367
0.21600278
0.21591152
0.21581942
0.21572919
0.21564469
0.21555604

# Keras

In [0]:
tf.reset_default_graph()

from tensorflow import keras
import numpy as np

In [0]:
# Model Definition

model = keras.models.Sequential()
model.add( keras.layers.Flatten( input_shape=(28,28,3) ) )
model.add( keras.layers.Dense(100, activation='sigmoid') )
model.add( keras.layers.Dense(1, activation='sigmoid') )

model.summary()



_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_2 (Flatten)          (None, 2352)              0         
_________________________________________________________________
dense_3 (Dense)              (None, 100)               235300    
_________________________________________________________________
dense_4 (Dense)              (None, 1)                 101       
Total params: 235,401
Trainable params: 235,401
Non-trainable params: 0
_________________________________________________________________


In [0]:
# Loss and Optimizer

model.compile(optimizer='adam', loss='mse', metrics=['accuracy', 'mae'])

model.fit( np.array(train_x), np.array(train_yb), epochs=100, validation_split=0.3, callbacks=[tensorboard] )

Train on 12223 samples, validate on 5239 samples


TypeError: ignored

In [0]:
# This is the TensorBoard setup routine from Lab01
!wget https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
!unzip ngrok-stable-linux-amd64.zip

LOG_DIR = './log'

get_ipython().system_raw(
    'tensorboard --logdir {} --host 0.0.0.0 --port 6006 &'
    .format(LOG_DIR)
)

get_ipython().system_raw('./ngrok http 6006 &')

! curl -s http://localhost:4040/api/tunnels | python3 -c \
    "import sys, json; print(json.load(sys.stdin)['tunnels'][0]['public_url'])"

tensorboard = keras.callbacks.TensorBoard(log_dir="log/{}".format(time.time()))

--2019-02-14 19:40:28--  https://bin.equinox.io/c/4VmDzA7iaHb/ngrok-stable-linux-amd64.zip
Resolving bin.equinox.io (bin.equinox.io)... 52.71.139.107, 52.54.84.112, 52.45.111.123, ...
Connecting to bin.equinox.io (bin.equinox.io)|52.71.139.107|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 5363700 (5.1M) [application/octet-stream]
Saving to: ‘ngrok-stable-linux-amd64.zip’


2019-02-14 19:40:29 (9.44 MB/s) - ‘ngrok-stable-linux-amd64.zip’ saved [5363700/5363700]

Archive:  ngrok-stable-linux-amd64.zip
  inflating: ngrok                   
http://f18ad8bc.ngrok.io
