### memo
注1) tensorflowに内包されたkerasを使用することで、model.fitの入力にdatasetのiteratorを使用可能である。
https://github.com/tensorflow/tensorflow/issues/20698 

注2) tensorflowのDataset APIをkerasで活用する場合、tensorflow内のkerasを利用するとエラーが出る不具合があるため対処用。
```
ValueError: Cannot take the length of shape with unknown rank.
```
https://stackoverflow.com/questions/53851793/valueerror-cannot-take-the-length-of-shape-with-unknown-rank  

対処法: https://github.com/tensorflow/tensorflow/pull/24522

In [1]:
import tensorflow as tf
print(tf.__version__)

import tensorflow.keras as keras
print(keras.__version__)

1.14.0
2.2.4-tf


In [0]:
from tensorflow.keras import backend as K
sess = tf.Session()
K.set_session( sess )

In [0]:
import os
os.environ[ 'TF_CPP_MIN_LOG_LEVEL'] = '2'

### GoogleDriveをマウントする。

In [4]:
import os
if os.name == 'nt':
    print('OS is Windows: PASS mount google drive')
    g_dir_work = '../colab/'
else:
    from google.colab import drive
    drive.mount('/content/drive')
    g_dir_work = '/content/drive/My Drive/colab/'

# check mount point
print('\n<< Display work dir >>')
for file in os.listdir(g_dir_work):
    print( 'file/dir : ', file)

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).

<< Display work dir >>
file/dir :  mylib
file/dir :  test.txt
file/dir :  data
file/dir :  template.ipynb
file/dir :  .ipynb_checkpoints
file/dir :  models
file/dir :  mnist_cams
file/dir :  mnist_pix2pix
file/dir :  tmp


### local Libraryパスを通す。

In [5]:
import sys
if os.name == 'nt':# windows
    lib_path='../colab/mylib'
else:
    print(os.getcwd())
    lib_path='/content/drive/My Drive/colab/mylib/'
sys.path.append(lib_path)

/content


### tf.data.TextLineDatasetのサンプル
https://deepage.net/tensorflow/2017/07/18/tensorflow-dataset-api.html

In [0]:
class Categories(object):
    __instance = None
    def __new__(cls, *args, **keys):
        if cls.__instance is None:
            cls.__instance = object.__new__(cls)
        return cls.__instance

    def __init__(self):
            self.items = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9' ]
            self.num   = len(self.items)

In [7]:
import datasets
import numpy as np

data = datasets.Dataset_mnist()
data.load()
input_file = data.save_for_textlinedataset(num=30)

Using TensorFlow backend.


In [8]:
def to_index(label):
    return Categories().items.index(label)

def parse_csv(line):
    [filename, category] = line.decode('utf-8').split(',')
    return filename, to_index(category)

def read_data(filename, label):
    inputs = np.load(filename.decode('utf-8')).astype(np.float32) / 255.0
    inputs = np.reshape(inputs, (inputs.shape[0], inputs.shape[1], 1))
    return inputs, label

def one_hot(data, label):
    return data, tf.one_hot( label, Categories().num  )

dataset = tf.data.TextLineDataset(input_file)
#dataset = dataset.skip(1)   # 列の読み飛ばし
if os.name == 'nt':
    dataset = dataset.map(lambda x  : tf.py_func(parse_csv, [x],    [tf.string,  tf.int32]))
    dataset = dataset.map(lambda x,y: tf.py_func(read_data, [x, y], [tf.float32, tf.int32]))
else:
    dataset = dataset.map(lambda x  : tf.py_func(parse_csv, [x],    [tf.string,  tf.int64]))
    dataset = dataset.map(lambda x,y: tf.py_func(read_data, [x, y], [tf.float32, tf.int64]))  
dataset = dataset.map(one_hot)
dataset = dataset.repeat()
dataset = dataset.shuffle(4)
dataset = dataset.batch(4)

iterator = dataset.make_one_shot_iterator()
next_elem = iterator.get_next()

W0818 08:30:54.378277 140532026447744 deprecation.py:323] From <ipython-input-8-debe3b51ba37>:22: py_func (from tensorflow.python.ops.script_ops) is deprecated and will be removed in a future version.
Instructions for updating:
tf.py_func is deprecated in TF V2. Instead, there are two
    options available in V2.
    - tf.py_function takes a python function which manipulates tf eager
    tensors instead of numpy arrays. It's easy to convert a tf eager tensor to
    an ndarray (just call tensor.numpy()) but having access to eager tensors
    means `tf.py_function`s can use accelerators such as GPUs as well as
    being differentiable using a gradient tape.
    - tf.numpy_function maintains the semantics of the deprecated tf.py_func
    (it is not differentiable, and manipulates numpy arrays). It drops the
    stateful argument making all functions stateful.
    
W0818 08:30:54.412954 140532026447744 deprecation.py:323] From <ipython-input-8-debe3b51ba37>:29: DatasetV1.make_one_shot_iter

In [9]:
# datasetテスト
val = sess.run(next_elem)

print(len(val))
print('\ndata ----------------------------------')
print(val[0].shape)
print(val[0].max())
print(val[0].min())
#print(val[0])

print('\nlabels ----------------------------------')
print(val[1].shape)
print(val[1])

2

data ----------------------------------
(4, 28, 28, 1)
1.0
0.0

labels ----------------------------------
(4, 10)
[[0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]]


### 訓練用モデル構築


In [10]:
import models
model = models.Model_mnist_classification()
model.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])

W0818 08:30:54.916355 140532026447744 deprecation.py:506] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/init_ops.py:1251: calling VarianceScaling.__init__ (from tensorflow.python.ops.init_ops) with dtype is deprecated and will be removed in a future version.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor


Model: "model_mnist_classification"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
imgs (InputLayer)            [(None, 28, 28, 1)]       0         
_________________________________________________________________
conv2d (Conv2D)              (None, 28, 28, 32)        320       
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64)          0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 7, 7, 128)         73856     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7

### モデルの最適化

In [0]:
batch_size = 10
epochs = 10
steps_per_epoch = 3

In [12]:
history = model.fit(next_elem, epochs=epochs, steps_per_epoch=steps_per_epoch)

ValueError: ignored

In [13]:
history = model.fit(dataset.make_one_shot_iterator(), epochs=epochs, steps_per_epoch=steps_per_epoch)

ValueError: ignored

In [14]:
history = model.fit(dataset, epochs=epochs, steps_per_epoch=steps_per_epoch)

ValueError: ignored

#### from_tensor_slices利用

In [15]:
x_train = data.x_train.reshape((-1,28,28,1)) / 255
y_train = keras.utils.to_categorical(data.y_train, Categories().num)
print(x_train.shape, y_train.shape)
dataset2 = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(2).repeat()

model2 = models.Model_mnist_classification_train(dataset2.make_one_shot_iterator())
model2.compile(loss=keras.losses.categorical_crossentropy,
              optimizer=keras.optimizers.Adam(),
              metrics=['accuracy'])
model2.fit(epochs=10, steps_per_epoch=4)

(60000, 28, 28, 1) (60000, 10)


AttributeError: ignored