[How to use RecordIO in Fluid](https://github.com/PaddlePaddle/Paddle/blob/25241e9e5e8f691465a9dbdce2aa38344cbd05a0/doc/fluid/howto/cluster/fluid_recordio.md)

In [1]:
import paddle.fluid as fluid
import paddle
import numpy as np

## Fixed-Size

In [2]:
X = np.random.random((64, 3, 24, 24)).astype('float32')
Y = np.random.randint(0, 1, (128, 1)).astype('int64')
BATCH_SIZE = 16

In [3]:
## Before
np.sum(X[:16])

13821.191

In [4]:
def reader_creator():
    for i in range(len(X)):
        yield X[i], Y[i]
reader = reader_creator()

In [5]:
batch_reader = paddle.batch(reader_creator, batch_size=1)

In [6]:
main_program = fluid.Program()
startup_program = fluid.Program()

with fluid.program_guard(main_program, startup_program):
    img = fluid.layers.data(name="image", shape=[3, 24, 24], dtype='float32')
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
    feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())

In [7]:
fluid.recordio_writer.convert_reader_to_recordio_file(
   "record.recordio", feeder=feeder, reader_creator=batch_reader)

64

In [8]:
!ls record.recordio -s

440 record.recordio


In [9]:
main_program = fluid.Program()
startup_program = fluid.Program()

with fluid.program_guard(main_program, startup_program):
    data_file = fluid.layers.open_recordio_file(
      filename="record.recordio",
      shapes=[[-1, 3, 24, 24], [-1, 1]],
      lod_levels=[0, 0],
      dtypes=["float32", "int64"],
      pass_num=100
    )
    data_file = fluid.layers.io.batch(data_file, batch_size=BATCH_SIZE)
    image, label = fluid.layers.read_file(data_file)

In [10]:
exe = fluid.executor.Executor(fluid.CPUPlace())
exe.run(startup_program)

[]

In [11]:
X, y = exe.run(main_program, fetch_list=[image, label])

In [12]:
X.shape, y.shape

((16, 3, 24, 24), (16, 1))

In [13]:
# Same as before
np.sum(X)

13821.191

## Dynamic Size

In [14]:
def dynamic_creator():
    for i in range(1, 128):
        X = np.arange(3 * i * i).reshape(3, i, i).astype('float32')
        Y = np.random.randint(0, i)
        yield X, Y
reader = dynamic_creator()

In [15]:
batch_reader = paddle.batch(dynamic_creator, batch_size=1)

In [16]:
main_program = fluid.Program()
startup_program = fluid.Program()

with fluid.program_guard(main_program, startup_program):
    img = fluid.layers.data(name="image", shape=[3, None, None], dtype='float32')
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
    feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())

In [17]:
fluid.recordio_writer.convert_reader_to_recordio_file(
   "dynamic_record.recordio", feeder=feeder, reader_creator=batch_reader)

127

In [18]:
!ls dynamic_record.recordio -s

8020 dynamic_record.recordio


In [19]:
main_program = fluid.Program()
startup_program = fluid.Program()

with fluid.program_guard(main_program, startup_program):
    data_file = fluid.layers.open_recordio_file(
      filename="dynamic_record.recordio",
      shapes=[[-1, 3, 1, 1], # 1, 1 Doesn't matter
              [-1, 1]],
      lod_levels=[0, 0],
      dtypes=["float32", "int64"],
      pass_num=100
    )
    data_file = fluid.layers.io.batch(data_file, batch_size=1) ## Batch size be 1 
    image, label = fluid.layers.read_file(data_file)

In [20]:
exe = fluid.executor.Executor(fluid.CPUPlace())
exe.run(startup_program)

[]

In [21]:
exe.run(main_program, fetch_list=[image, label])

[array([[[[0.]],
 
         [[1.]],
 
         [[2.]]]], dtype=float32), array([[0]])]

In [22]:
exe.run(main_program, fetch_list=[image, label])

[array([[[[ 0.,  1.],
          [ 2.,  3.]],
 
         [[ 4.,  5.],
          [ 6.,  7.]],
 
         [[ 8.,  9.],
          [10., 11.]]]], dtype=float32), array([[1]])]