# Tensorflow图像的读取

In [1]:
import tensorflow as tf
import os

In [2]:
# 1、找到文件放入列表
file_name=os.listdir('./Data/day_5/dog/')
filelist=[os.path.join('./Data/day_5/dog/',file) for file in file_name]

In [3]:
# 2、构造文件队列
file_queue=tf.train.string_input_producer(filelist)

# 构造阅读器去读取文件内容（默认读取一张图片）
reader=tf.WholeFileReader()

key,value=reader.read(file_queue)

# print(value)

In [4]:
# 3、对读取的图片进行解码
image=tf.image.decode_jpeg(value)
# print(image)

In [5]:
# 4、处理图片的大小（统一大小）
image_resize=tf.image.resize_images(image,[200,200])
# print(image_resize)

# 注意：在批处理之前要求所有数据的形状必须定义
image_resize.set_shape([200,200,3])

In [6]:
# 5、进行批处理
image_batch=tf.train.batch([image_resize],batch_size=20,num_threads=1,capacity=20)
image_batch

<tf.Tensor 'batch:0' shape=(20, 200, 200, 3) dtype=float32>

In [8]:
# 开启会话运行结构
with tf.Session() as sess:
    # 定义一个线程协调器
    coord=tf.train.Coordinator()

    # 开启读取文件的线程
    threads=tf.train.start_queue_runners(sess,coord=coord)

    # 打印读取内容
    print(sess.run([image_batch]))

    # 回收子线程
    coord.request_stop()
    coord.join(threads)

[array([[[[ 50.      ,  50.      ,  50.      ],
         [ 50.      ,  50.      ,  50.      ],
         [ 49.56    ,  49.56    ,  49.56    ],
         ...,
         [ 89.849915, 104.849915,  85.849915],
         [ 88.660095, 103.660095,  84.660095],
         [ 80.61005 ,  95.61005 ,  76.61005 ]],

        [[ 50.      ,  50.      ,  50.      ],
         [ 50.      ,  50.      ,  50.      ],
         [ 49.56    ,  49.56    ,  49.56    ],
         ...,
         [ 91.47414 , 106.47414 ,  87.47414 ],
         [ 81.152054,  96.152054,  77.152054],
         [ 75.566025,  90.566025,  71.566025]],

        [[ 50.      ,  50.      ,  50.      ],
         [ 50.      ,  50.      ,  50.      ],
         [ 49.56    ,  49.56    ,  49.56    ],
         ...,
         [ 78.23002 ,  93.23002 ,  74.23002 ],
         [ 76.26404 ,  91.26404 ,  72.26404 ],
         [ 73.8     ,  88.8     ,  69.8     ]],

        ...,

        [[252.60004 , 255.      , 254.      ],
         [252.60004 , 255.      , 254.      

# 读取二进制文件

In [1]:
import tensorflow as tf
import os

In [2]:
# 构造文件列表
file_name=os.listdir('./Data/day_5/cifar-10-batches-bin/')
filelist=[os.path.join('./Data/day_5/cifar-10-batches-bin/',file) for file in file_name if file[-3:]=='bin']
filelist

['./Data/day_5/cifar-10-batches-bin/data_batch_1.bin',
 './Data/day_5/cifar-10-batches-bin/data_batch_2.bin',
 './Data/day_5/cifar-10-batches-bin/data_batch_3.bin',
 './Data/day_5/cifar-10-batches-bin/data_batch_4.bin',
 './Data/day_5/cifar-10-batches-bin/data_batch_5.bin',
 './Data/day_5/cifar-10-batches-bin/test_batch.bin']

In [3]:
# 构造文件队列
file_queue=tf.train.string_input_producer(filelist)

# 构造阅读器
reader=tf.FixedLengthRecordReader(3073)

key,value=reader.read(file_queue)

# 解码内容
label_image=tf.decode_raw(value,tf.uint8)

# 分割出图片和标签数据
label=tf.cast(tf.slice(label_image,[0],[1]),tf.int32)
image=tf.slice(label_image,[1],[3072])

# 对图片格式进行改变
image_reshape=tf.reshape(image,[32,32,3])

# 批处理
image_batch,label_batch=tf.train.batch([image_reshape,label],batch_size=10,num_threads=1,capacity=10)

In [3]:
# label

In [4]:
# image

In [5]:
# image_reshape

In [6]:
# image_batch

In [7]:
# label_batch

In [9]:
# 开启会话运行结构
with tf.Session() as sess:
    # 定义一个线程协调器
    coord=tf.train.Coordinator()

    # 开启读取文件的线程
    threads=tf.train.start_queue_runners(sess,coord=coord)

    # 打印读取内容
    print(sess.run([image_batch,label_batch]))

    # 回收子线程
    coord.request_stop()
    coord.join(threads)

[array([[[[ 35,  27,  25],
         [ 26,  24,  22],
         [ 23,  25,  25],
         ...,
         [154, 134, 141],
         [143, 109,  57],
         [ 46,  52,  52]],

        [[111,  70,  38],
         [ 31,  36,  35],
         [ 35,  36,  38],
         ...,
         [150, 153, 154],
         [148, 118,  59],
         [ 57,  69,  77]],

        [[128, 102,  88],
         [ 63,  58,  57],
         [ 56,  60,  41],
         ...,
         [163, 143, 145],
         [150, 120,  56],
         [ 51,  60,  65]],

        ...,

        [[162, 162, 164],
         [164, 164, 165],
         [165, 165, 166],
         ...,
         [170, 170, 170],
         [172, 170, 168],
         [165, 167, 172]],

        [[165, 162, 163],
         [163, 165, 164],
         [164, 160, 160],
         ...,
         [171, 174, 173],
         [171, 172, 168],
         [168, 170, 170]],

        [[171, 166, 165],
         [169, 173, 175],
         [174, 172, 171],
         ...,
         [175, 174, 174],
       

# 将图片的特征值和目标值存入tfrecords

In [9]:
# 构造一个tfrecords文件
writer=tf.python_io.TFRecordWriter('./Data/tem/')

# 循环将所有文件写入，每个样本要构造example协议
for i in range(10):
    # 取出第i个图片的特征值和目标值
    image=image_batch[i].eval().tostring()
    label=int(label_batch[i].eval()[0])

    # 构造一个样本的example
    example=tf.train.Example(features=tf.train.Features(feature={
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
        'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[label]))
        }))

    # 写入单独样本
    writer.write(example.SerializeToString()
    
writer.close()

SyntaxError: invalid syntax (<ipython-input-9-68086df72b72>, line 19)

# tfrecords文件的读取

In [None]:
# 构造文件队列
file_queue=tf.train.string_input_producer(filelist)

In [1]:
# 构造阅读器,读取example,value，一个样本序列化的example
reader=tf.TFRecordReader()
key,value=reader.read(file_queue)

In [2]:
# 解析example
tf.parse_single_example(value,features={
   'image':tf.FixedLenFeature([],tf.string),
   'label':tf.FixedLenFeature([],tf.int64),
})

In [None]:
# 解码内容,如果读取内容string格式，需要解码
image=tf.decode_raw(features['image'],tf.uint8)

# 设置图片形状，以便批处理
tf.reshape(image,[32,32,3])

label=tf.cast(features['label'],tf.int32)

In [None]:
# 进行批处理
image_batch,label_batch=tf.train.batch([image_reshape,label],batch_size=10,num_threads=1,capacity=10)