# Style Transfer Implementation

In [1]:
import os
import math
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image # 更方便读取存储图像
import tensorflow as tf
import time
%matplotlib inline

## 读取参数 构建VGGNET

In [2]:
''' VGGNET 要求的输入图像归一化参数 '''
VGG_MEAN = [103.939, 116.779, 123.68]

class VGGNet:
    """
    load params from vgg16.npy (pre-train models)
    data is dict form 
    need build net structure and then give params
    so need constant/trainable = False layers params
    so need to use tf.nn.conv2d to assign constant value for params
    (because of tf.layers.conv2d 自动生成变量 赋值不方便) 
    tf.nn下的fun 比tf.layers的fun少了一层封装 不方便但是更灵活
    build vgg-16 net structure
    """
    def __init__(self,data_dict):
        self.data_dict = data_dict
    '''获取参数'''
    def _get_conv_layer_weights(self,name):
        return tf.constant(self.data_dict[name][0],name='conv_w')
    def _get_fc_layer_weights(self,name):
        return tf.constant(self.data_dict[name][0],name='fc_w')
    def _get_bias(self,name):
        return tf.constant(self.data_dict[name][1],name='b')
    '''构建layer'''
    def conv_layer(self,x,name):
        with tf.name_scope(name):
            conv_w = self._get_conv_layer_weights(name)
            conv_b = self._get_bias(name)
            conv = tf.nn.conv2d(x,conv_w,[1,1,1,1],padding='SAME')
            conv = tf.nn.bias_add(conv,conv_b)
            conv = tf.nn.relu(conv)
            return conv
    def pooling_layer(self,x,name):
        with tf.name_scope(name):
            return tf.nn.max_pool(x,ksize = [1,2,2,1],
                                  strides = [1,2,2,1],
                                  padding='SAME',name = name)
    def fc_layer(self,x,name,activation=tf.nn.relu):
        with tf.name_scope(name):
            fc_w = self._get_fc_layer_weights(name)
            fc_b = self._get_bias(name)
            h = tf.matmul(x,fc_w)
            h = tf.nn.bias_add(h,fc_b)
            if activation == None:
                '''最后一层不需要激活 使用softmax即可'''
                return h
            else:
                return activation(h)
    def flatten_layer(self,x,name):
        """flatten layer(tf.layers.flatten也可以)"""
        with tf.name_scope(name):
            x_shape = x.get_shape().as_list()[1:]
            dim2 = 1
            for i in x_shape:
                dim2 *= i
            x = tf.reshape(x,[-1,dim2])
            return x
    
    def build(self,x_rgb):
        """
        给定输入rgb格式224*224 转换成bgr 并且用均值归一化
        实现VGG16输入预处理 VGG16输入为224*224 的抽样预处理图像
        并对构件时间进行统计
        """
        assert x_rgb.get_shape().as_list()[1:] == [224,224,3]
        start_time = time.time()
        print('building start')
        '''prepocessing by split and concat'''
        r,g,b = tf.split(x_rgb,[1,1,1],axis=3)
        x_bgr = tf.concat([b-VGG_MEAN[0],
                           g-VGG_MEAN[1],
                           r-VGG_MEAN[2]],axis=3)
        
        '''build network structure with five conv layers and three fc layers'''
        '''
        为了方便下面使用 所以用成员variables 从而方便外部获取 调用
        '''
        self.conv1_1 = self.conv_layer(x_bgr,'conv1_1')
        self.conv1_2 = self.conv_layer(self.conv1_1,'conv1_2')
        self.pool1 = self.pooling_layer(self.conv1_2,'pool1')
        
        self.conv2_1 = self.conv_layer(self.pool1,'conv2_1')
        self.conv2_2 = self.conv_layer(self.conv2_1,'conv2_2')
        self.pool2 = self.pooling_layer(self.conv2_2,'pool2')
        
        self.conv3_1 = self.conv_layer(self.pool2,'conv3_1')
        self.conv3_2 = self.conv_layer(self.conv3_1,'conv3_2')
        self.conv3_3 = self.conv_layer(self.conv3_2,'conv3_3')
        self.pool3 = self.pooling_layer(self.conv3_3,'pool3')
        
        self.conv4_1 = self.conv_layer(self.pool3,'conv4_1')
        self.conv4_2 = self.conv_layer(self.conv4_1,'conv4_2')
        self.conv4_3 = self.conv_layer(self.conv4_2,'conv4_3')
        self.pool4 = self.pooling_layer(self.conv4_3,'pool4')
        
        self.conv5_1 = self.conv_layer(self.pool4,'conv5_1')
        self.conv5_2 = self.conv_layer(self.conv5_1,'conv5_2')
        self.conv5_3 = self.conv_layer(self.conv5_2,'conv5_3')
        self.pool5 = self.pooling_layer(self.conv5_3,'pool5')
        
        '''fc layer 在图像风格转换中农意义不大 '''
        '''节省5s
        self.flatten5 = self.flatten_layer(self.pool5,'flatten5')
        self.fc6 = self.fc_layer(self.flatten5,'fc6')
        self.fc7 = self.fc_layer(self.fc6,'fc7')
        self.fc8 = self.fc_layer(self.fc7,'fc8',activation=None)
        self.prob = tf.nn.softmax(self.fc8,name = 'softmax')
        '''
        print('building stop,total time spends: %4ds'% (time.time()-start_time))
        # return self.prob

## hyper params defination and file path

In [9]:
vgg16_npy_path = './style_transfer_data/vgg16.npy'
content_img_path = './style_transfer_data/content.jpg'
style_img_path = './style_transfer_data/style.jpg'

out_dir = './style_transfer_data/run_out'
if not os.path.exists(out_dir):
    os.mkdir(out_dir)
    
'''loss weights
二者相差甚远 所以通过不同权重使二者相近
'''
lambda_content = 0.1
lambda_style = 500

'''
由于只有一个variables需要修正 深层网络 使用adam 指定训练次数 以及较大的初始学习率
'''

num_steps = 100
learning_rate = 5 

## model build

### cnn model

In [4]:
def read_img(img_name):
    img = Image.open(img_name)
    img_np = np.array(img)
    # 维度转换
    img_np = np.asarray([img_np],dtype = np.int32)
#     img_np = np.reshape(img_np,[1,224,224,3])
    return img_np

def initial_result_img(shape,mean,stddev):
    """
    特别小心 不同与其他图片使用placeholder传入 是data 不是变量
    这里的result_img 作为输入 是额唯一的起始变量 所以需要声明 
    否则整个网络中全都是常量
    """
    initial_img = tf.truncated_normal(shape,mean=mean,stddev=stddev)
    return tf.Variable(initial_img)

result_img = initial_result_img([1,224,224,3],127.5,12)
content_img_val = read_img(content_img_path)
style_img_val = read_img(style_img_path)
'''
The value of a feed cannot be a tf.Tensor object.
需要先运行生成np.array
'''
content_img_op = tf.image.resize_bicubic(content_img_val,[224,224])
style_img_op = tf.image.resize_bicubic(style_img_val,[224,224])


content_img = tf.placeholder(tf.float32,shape=[1,224,224,3])
style_img = tf.placeholder(tf.float32,shape=[1,224,224,3])

# npy file 可以直接通过np进行读取 内容得到np.ndarray类型
data_dict = np.load(vgg16_npy_path,allow_pickle=True,encoding="latin1").item() # 转成字典类型
'''
其实也可以只构建一个网络
content style 送入网络运算得到结果 
之后直接与常结果计算loss 从而实现输入var的更新
这里使用三个网络 因为构建本身耗时较少 而且更方便
'''
vggnet_for_content = VGGNet(data_dict)
vggnet_for_style = VGGNet(data_dict)
vggnet_for_result = VGGNet(data_dict)

vggnet_for_content.build(content_img)
vggnet_for_style.build(style_img)
vggnet_for_result.build(result_img)



building start
building stop,total time spends:    0s
building start
building stop,total time spends:    0s
building start
building stop,total time spends:    0s


### style transfer kernel model (loss calculate)

In [5]:
'''计算loss'''

'''feature_size, [1, width, height, channel]'''
content_features = [
    vggnet_for_content.conv1_2,
    # vgg_for_content.conv2_2,
    # vgg_for_content.conv3_3,
    # vgg_for_content.conv4_3,
    # vgg_for_content.conv5_3
]
style_features = [
    # vgg_for_style.conv1_2,
    # vgg_for_style.conv2_2,
    # vgg_for_style.conv3_3,
    vggnet_for_style.conv4_3,
    # vgg_for_style.conv5_3
]
result_content_features = [
    vggnet_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    # vgg_for_result.conv4_3,
    # vgg_for_result.conv5_3
]
result_style_features = [
    # vgg_for_result.conv1_2,
    # vgg_for_result.conv2_2,
    # vgg_for_result.conv3_3,
    vggnet_for_result.conv4_3,
    # vgg_for_result.conv5_3
]
def gram_matrix(x):
    """calculate style loss assess matrix
    Args:
    - x: features extracted from vggnet. shape=[1,w,h,ch]
    """
    b,w,h,ch = x.get_shape().as_list()
    x = tf.reshape(x,[b,h*w,ch]) # ?? 自动忽略第一维度??
    '''[h*w, ch] matrix -> [ch, h*w] * [h*w, ch] -> [ch, ch]'''
    gram = tf.matmul(x,x,adjoint_a=True) #对第一个matrix转置
    gram = gram/tf.constant(ch*w*h,tf.float32) #tf类型敏感
    return gram

'''loss calculate - content loss'''
''' two loss is variables '''
content_loss = tf.zeros(1,tf.float32)
for c,c_ in zip(content_features,result_content_features):
    content_loss += tf.reduce_mean(tf.square(c-c_),axis = [1,2,3])

style_loss = tf.zeros(1,tf.float32)
'''get gram matrix to cal loss'''
style_gram = [gram_matrix(i) for i in style_features]
result_style_gram = [gram_matrix(i) for i in result_style_features]
print(style_gram[0].get_shape())
print(result_style_gram[0].get_shape())
for s,s_ in zip(style_gram,result_style_gram):
    style_loss += tf.reduce_mean(tf.square(s-s_),axis = [1,2])

loss = content_loss*lambda_content + style_loss*lambda_style
train_op = tf.train.AdamOptimizer(learning_rate).minimize(loss)

(1, 512, 512)
(1, 512, 512)


## train pocess

In [8]:
init_op = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init_op)
    content_img_val ,style_img_val = sess.run([content_img_op,style_img_op])
#     (1, 224, 224, 3) <class 'numpy.ndarray'>
    '''
    print(content_img_val.shape,type(content_img_val))
    print(content_img_val)
    不转换问题在于不是float32 是float其他 所以需要修正
    int32可以自动转换为float32 在tf中
    '''
    content_img_val = np.asarray(content_img_val,np.int32)
    style_img_val = np.asarray(style_img_val,np.int32)
    
    for step in range(num_steps):
        loss_value,content_loss_value,style_loss_value,_ = sess.run([loss,content_loss,style_loss,train_op],
                  feed_dict = {
                      content_img:content_img_val,
                      style_img:style_img_val
                  })
        print('step: %d, loss_value: %8.4f, content_loss: %8.4f, style_loss: %8.4f' \
            % (step+1,
               loss_value[0],
               content_loss_value[0],
               style_loss_value[0]))
        # save img
        result_img_path = os.path.join(out_dir,'result-%04d.jpg'%(step+1))
        '''photo prepocessing'''
        result_img_val = result_img.eval(sess)[0]
        result_img_val = np.clip(result_img_val,0,255)
        result_img_val = np.asarray(result_img_val,np.uint8)
        '''PIL Image save photo'''
        img = Image.fromarray(result_img_val)
        img.save(result_img_path)

step: 1, loss_value: 7448.3359, content_loss: 48428.3672, style_loss:   5.2110
step: 2, loss_value: 6688.9331, content_loss: 41532.7188, style_loss:   5.0713
step: 3, loss_value: 5983.2725, content_loss: 35858.5820, style_loss:   4.7948
step: 4, loss_value: 5389.6377, content_loss: 31515.6777, style_loss:   4.4761
step: 5, loss_value: 4974.5527, content_loss: 28361.1855, style_loss:   4.2769
step: 6, loss_value: 4699.3623, content_loss: 26158.2988, style_loss:   4.1671
step: 7, loss_value: 4224.9463, content_loss: 24506.6641, style_loss:   3.5486
step: 8, loss_value: 3736.1094, content_loss: 23300.8574, style_loss:   2.8120
step: 9, loss_value: 3399.5518, content_loss: 22423.0176, style_loss:   2.3145
step: 10, loss_value: 3137.6018, content_loss: 21768.0664, style_loss:   1.9216
step: 11, loss_value: 2946.9092, content_loss: 21282.8477, style_loss:   1.6372
step: 12, loss_value: 2812.5525, content_loss: 20899.6875, style_loss:   1.4452
step: 13, loss_value: 2689.4094, content_loss: 20

## observe vgg16 param

In [None]:
'''
# save np.load
np_load_old = np.load
# modify the default parameters of np.load
np.load = lambda *a,**k: np_load_old(*a, **k, allow_pickle=True)
'''
'''
np.load 防止sql注入不支持默认导入 编码也要小心
https://www.cnblogs.com/Tom-Ren/p/11054596.html
如果使用python3读取python2生成的npy就有可能产生编码错误
'ASCII', 'latin1', 'bytes'
'''
data = np.load(vgg16_npy_path,allow_pickle=True,encoding="latin1")
print(type(data))
# print(data)
data_dict = data.item()
print(data_dict.keys())
print(len(data_dict))
conv1_1 = data_dict['conv1_1']
print(len(conv1_1))
w, b = conv1_1
print(w.shape)
print(b.shape)

fc6 = data_dict['fc6']
print(len(fc6))
w, b = fc6
print(w.shape)
print(b.shape)