In [1]:
import io
import re
import zipfile
from collections import OrderedDict

import tensorflow as tf
from PIL import Image
from tqdm import tqdm
import numpy as np
from IPython.display import display

In [2]:
def extract_images_bytes(path_64='train_64.zip', path='../CatDog/train.zip'):
    labels_images = OrderedDict()
    # load 64
    z = zipfile.ZipFile(path_64, 'r')
    for file in z.filelist:
        m = re.match('.*(cat|dog).*', file.filename)
        if m:
            category = m.groups()[0]
            if category != 'cat': continue
            img = Image.open(io.BytesIO((z.open(file.filename).read())))
            label = re.sub('[^/]+/', '', file.filename)
            labels_images[label] = [img]
    # load origin
    z = zipfile.ZipFile(path, 'r')
    for file in z.filelist:
        m = re.match('.*(cat|dog).*', file.filename)
        if m:
            category = m.groups()[0]
            if category != 'cat': continue
            label = re.sub('[^/]+/', '', file.filename)
            if label not in labels_images: continue
            img = Image.open(io.BytesIO((z.open(file.filename).read())))
            labels_images[label].append(img)
    return labels_images

In [3]:
train = extract_images_bytes()

In [4]:
X_train = np.array([np.array(x[0]) for x in train.values()])
y_train = np.array([np.array(x[1]) for x in train.values()])

In [5]:
test = extract_images_bytes('test_64.zip', '../CatDog/test.zip')

In [6]:
X_test = np.array([np.array(x[0]) for x in test.values()])
y_test = np.array([np.array(x[1]) for x in test.values()])

In [7]:
# X_train = X_train / 255.0
# y_train = y_train / 255.0
# X_test = X_test / 255.0
# y_test = y_test / 255.0

In [8]:
print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

(10000, 64, 64, 3) (10000, 128, 128, 3) (2500, 64, 64, 3) (2500, 128, 128, 3)


In [9]:
batch_size = 8
learning_rate = 0.1
stddev = 0.1

In [10]:
X = tf.placeholder(tf.float32, [batch_size, 64, 64, 3], name='X')
y = tf.placeholder(tf.float32, [batch_size, 128, 128, 3], name='y')

In [11]:
pitch_1 = tf.Variable(tf.random_normal([9, 9, 3, 64], stddev=stddev), name='pitch_1')
pitch_1_bias = tf.Variable(tf.random_normal([64], stddev=stddev), name='pitch_1_bias')

In [12]:
conv_1 = tf.nn.relu(
    tf.nn.bias_add(
        tf.nn.conv2d(
            X, pitch_1, strides=[1, 1, 1, 1], padding='SAME'
        ),
        pitch_1_bias,
        name='bias_add_1'
    ),
    name='relu_1'
)

In [13]:
conv_1.get_shape()

TensorShape([Dimension(8), Dimension(64), Dimension(64), Dimension(64)])

In [14]:
pitch_2 = tf.Variable(tf.random_normal([1, 1, 64, 128], stddev=stddev), name='pitch_2')
pitch_2_bias = tf.Variable(tf.random_normal([128], stddev=stddev), name='pitch_2_bias')

In [15]:
conv_2 = tf.nn.relu(
    tf.nn.bias_add(
        tf.nn.conv2d(
            conv_1, pitch_2, strides=[1, 1, 1, 1], padding='SAME'
        ),
        pitch_2_bias,
        name='bias_add_2'
    ),
    name='relu_2'
)

In [16]:
conv_2.get_shape()

TensorShape([Dimension(8), Dimension(64), Dimension(64), Dimension(128)])

In [17]:
pitch_3 = tf.Variable(tf.random_normal([5, 5, 128, 3], stddev=stddev), name='pitch_3')
pitch_3_bias = tf.Variable(tf.random_normal([3], stddev=stddev), name='pitch_3_bias')

In [18]:
conv_3 = tf.nn.relu(
    tf.nn.bias_add(
        tf.nn.conv2d(
            conv_2, pitch_3, strides=[1, 1, 1, 1], padding='SAME'
        ),
        pitch_3_bias,
        name='bias_add_3'
    ),
    name='relu_3'
)

In [19]:
conv_3.get_shape()

TensorShape([Dimension(8), Dimension(64), Dimension(64), Dimension(3)])

In [20]:
pitch_4 = tf.Variable(tf.random_normal([1, 1, 3, 3], stddev=stddev), name='pitch_2')
pitch_4_bias = tf.Variable(tf.random_normal([3], stddev=stddev), name='pitch_2_bias')

In [21]:
dconv = tf.nn.bias_add(
    tf.nn.conv2d_transpose(
        conv_3, pitch_4,
        (batch_size, 128, 128, 3),
        (1, 2, 2, 1),
        padding='VALID',
        data_format='NHWC'
    ),
    pitch_4_bias,
    name='bias_add_4'
)

In [22]:
dconv.get_shape()

TensorShape([Dimension(8), Dimension(128), Dimension(128), Dimension(3)])

In [23]:
pred = dconv

In [24]:
cost = tf.reduce_mean(tf.pow(tf.sub(dconv, y), 2.0))

In [25]:
opt = tf.train.AdamOptimizer(learning_rate=learning_rate)

In [26]:
params = tf.trainable_variables()

In [27]:
gradients = tf.gradients(cost, params)

In [28]:
clipped_gradients, norm = tf.clip_by_global_norm(gradients, 5.0)

In [29]:
global_step = tf.Variable(0, trainable=False)

In [30]:
train_step = opt.apply_gradients(zip(clipped_gradients, params), global_step=global_step)

In [31]:
init = tf.global_variables_initializer()

In [32]:
def batch_flow(inputs, targets, batch_size):
    """流动数据流"""
    flowed = 0
    total = len(inputs)
    while True:
        X_ret = []
        y_ret = []
        for i in range(total):
            X_ret.append(inputs[i])
            y_ret.append(targets[i])
            if len(X_ret) == batch_size:
                flowed += batch_size
                X, y = np.array(X_ret), np.array(y_ret)
                yield X, y
                X_ret = []
                y_ret = []
            if flowed >= total:
                break
        if flowed >= total:
            break

In [33]:
for batch_x, batch_y in batch_flow(X_train, y_train, batch_size):
    print(batch_x.shape, batch_y.shape)
    break

(8, 64, 64, 3) (8, 128, 128, 3)


In [34]:
n_epoch = 10

In [35]:
with tf.Session() as sess:
    sess.run(init)
    total = None
    for epoch in range(n_epoch):
        costs = []
        for batch_x, batch_y in tqdm(batch_flow(X_train, y_train, batch_size), total=total):
            _, c = sess.run([train_step, cost], feed_dict={X: batch_x, y: batch_y})
            costs.append(c)
        print('epoch: {}, loss: {:.4f}'.format(epoch, np.mean(costs)))
        if total is None:
            total = len(costs)
    print('calculate train accuracy')
    costs = []
    train_result = []
    for batch_x, batch_y in tqdm(batch_flow(X_train, y_train, batch_size)):
        c, p = sess.run([cost, pred], feed_dict={X: batch_x, y: batch_y})
        costs.append(c)
        train_result += list(p)
    print('test loss: {:.4f}'.format(np.mean(costs)))
    print('calculate test accuracy')
    costs = []
    test_result = []
    for batch_x, batch_y in tqdm(batch_flow(X_test, y_test, batch_size)):
        c, p = sess.run([cost, pred], feed_dict={X: batch_x, y: batch_y})
        costs.append(c)
        test_result += list(p)
    print('test loss: {:.4f}'.format(np.mean(costs)))
    print('Done')

1250it [01:00, 20.60it/s]:00,  1.16it/s]
  0%|          | 3/1250 [00:00<00:58, 21.15it/s]

epoch: 0, loss: 112824552.0000


100%|██████████| 1250/1250 [01:00<00:00, 19.15it/s]%|          | 9/1250 [00:00<00:58, 21.23it/s]
  0%|          | 3/1250 [00:00<01:01, 20.17it/s]

epoch: 1, loss: 4422.0864


100%|██████████| 1250/1250 [01:01<00:00, 20.82it/s]%|          | 7/1250 [00:00<01:03, 19.72it/s]
  0%|          | 3/1250 [00:00<01:00, 20.60it/s]

epoch: 2, loss: 4421.6816


100%|██████████| 1250/1250 [01:00<00:00, 20.53it/s]%|          | 8/1250 [00:00<01:01, 20.32it/s]
  0%|          | 3/1250 [00:00<00:58, 21.43it/s]

epoch: 3, loss: 4421.6841


 71%|███████   | 888/1250 [00:42<00:17, 20.50it/s]1%|          | 9/1250 [00:00<00:58, 21.31it/s]


KeyboardInterrupt: 

In [None]:
display(
    Image.fromarray(np.uint8(X_train[0])).resize((128, 128), Image.ANTIALIAS),
    Image.fromarray(np.uint8(train_result[0])),
    Image.fromarray(np.uint8(y_train[0]))
)