### change degree feature for all trained label

In [1]:
import tensorflow as tf
LABEL_CLASS_LIST = ['NG', 'OK']
label_lookup = tf.lookup.StaticHashTable(
        tf.lookup.KeyValueTensorInitializer(LABEL_CLASS_LIST, tf.constant([0, 1], dtype=tf.int64)), -1)
DEGREE_CLASS_LIST = ['000', '180', '270', '090']
degree_lookup = tf.lookup.StaticHashTable(
        tf.lookup.KeyValueTensorInitializer(DEGREE_CLASS_LIST, tf.constant(list(range(len(DEGREE_CLASS_LIST))), dtype=tf.int64)), -1)
def parse_img(img):
    img = tf.io.decode_image(img, channels=3, dtype=tf.dtypes.float32, expand_animations = False)
    img = tf.image.resize_with_pad(img, 75, 75)
    return img

def label_to_onehot(label):
    label = label_lookup.lookup(label)
    onehot_label = tf.one_hot(label, 2)
    onehot_label = tf.cast(onehot_label, dtype=tf.float32)
    return onehot_label

def degree_to_onehot(degree):
    degree = degree_lookup.lookup(degree)
    onehot_degree = tf.one_hot(degree, 4)
#     onehot_degree = tf.cast(onehot_degree, dtype=tf.float32)
    return onehot_degree

def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

In [2]:
def serialize_example(path, label, degree):
    features = {
        "image": _bytes_feature(tf.io.read_file(path).numpy()),
        "path": _bytes_feature(tf.constant(path).numpy()),
        "degree": _bytes_feature(tf.constant(degree).numpy()),
        "label": _bytes_feature(tf.constant(label).numpy()),
#         "path": _bytes_feature(path.encode()),
#         "degree": _bytes_feature(degree.encode()),
#         "label": _bytes_feature(label.encode()),
#         "degree": _int64_feature(degree_lookup.lookup(degree).numpy()),
#         "label": _int64_feature(label_lookup.lookup(label).numpy()),
    }
    example_proto = tf.train.Example(features=tf.train.Features(feature=features))
    return example_proto.SerializeToString()

In [3]:
import sqlite3
conn = sqlite3.connect('/tf/robertnb/p1-dip-metadata.db')
c = conn.cursor()

In [5]:
c.execute(f"""select path, label, degree from metadata
        where
            label = 'OK' and
            degree is not NULL and
            width is not NULL and
            height is not NULL and
            ( component_class != 'label' and
            component_class != 'heat_sink' and
            component_class != 'screw' )
            ORDER BY RANDOM() LIMIT 10000
        """)

totfrecord = c.fetchall()
len(totfrecord)

10000

In [13]:
def to_ds_comp_label(label, component):
    x= c.execute( 
            f"""select path, label, degree from metadata 
            where label = '{label}' and 
            component_class = '{component}' and
            path like '%/data-center/%'
            ORDER BY RANDOM() LIMIT 1000
            """).fetchall()
    return x

In [14]:
totfrecord = []
for label in ["NG", "OK"]:
    for component in ["label", "heat_sink", "screw"]:
        totfrecord.append(to_ds_comp_label(label, component))

In [22]:
tfrecord_filepath = '/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/datacenter_by_comp_label_limit1K.tfrecord'
with tf.io.TFRecordWriter(tfrecord_filepath) as writer:
    for ol in totfrecord:
        for (p, l, d) in ol:
            example = serialize_example(p, l, d)
            writer.write(example)
#         for i in range(3):
#             degree_to_add = (int(i)+1)*90
#             wrong_degree = str((int(ol[2])+degree_to_add)%360)
# #             example = serialize_example(ol[0], tf.constant('NG'), tf.constant(wrong_degree))
#             example = serialize_example(ol[0], 'NG', wrong_degree)
#             writer.write(example)
print('Exported Successfully to '+tfrecord_filepath)

Exported Successfully to /data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/datacenter_by_comp_label_limit1K.tfrecord


### other comp directly transferred by SqlDataset

In [19]:
def _bytes_feature(value):
  """Returns a bytes_list from a string / byte."""
  return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def _int64_feature(value):
  """Returns an int64_list from a bool / enum / int / uint."""
  return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def parse_metadata_and_serialize_example(path, degree, label):
    image = tf.io.read_file(path)
#     degree = degree_lookup.lookup(degree)
#     label = label_lookup.lookup(label)

    features = {
        "path": _bytes_feature(path.numpy()),
        "image": _bytes_feature(image.numpy()),
        "degree": _bytes_feature(degree.numpy()),
        "label": _bytes_feature(label.numpy()),
#         "degree": _int64_feature(degree.numpy()),
#         "label": _int64_feature(label.numpy()),
    }
    example_proto = tf.train.Example(
        features=tf.train.Features(feature=features))
    return example_proto.SerializeToString()

def tf_serialize_example(path, degree, label):
    tf_string = tf.py_function(
        parse_metadata_and_serialize_example,
        (path, degree, label),
        tf.string)
    return tf.reshape(tf_string, ())

def generate_training_tfrecord_by_command(db_path, sql_command, dtype_tuple, export_path):
    dataset = tf.data.experimental.SqlDataset(
        "sqlite", db_path,
        sql_command, dtype_tuple)
    dataset = dataset.map(tf_serialize_example, tf.data.experimental.AUTOTUNE)
    writer = tf.data.experimental.TFRecordWriter(export_path)
    writer.write(dataset)

In [20]:
import sqlite3
conn = sqlite3.connect('/tf/robertnb/p1-dip-metadata.db')
c = conn.cursor()

In [21]:
db_path = f"""select path, label, degree from metadata 
              where (
                    component_class='screw' or
                    component_class='heat_sink') and
                    date < 20200503
          """
c.execute(db_path)

totfrecord = c.fetchall()
len(totfrecord)

4881

In [22]:
export_path = '/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/screw_heatsink_before_20200503.tfrecord'
generate_training_tfrecord_by_command('/tf/robertnb/p1-dip-metadata.db', db_path, (tf.string, tf.string, tf.string), export_path)

### to tfrecord from list_files

In [6]:
import glob, os
def preprocessed_features(path):
    front, fn = os.path.split(path)
    label = front.split(os.path.sep)[-1]
    degree = fn.split('_')[2]
    return path, label, degree

stylized = '/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/preprocessed/stylized_sh/*/*.jpg'
stylized_paths = glob.glob(stylized)
preprocessed_features(stylized_paths[0])

('/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/preprocessed/stylized_sh/OK/8SSB27A18611W5ZS99500F0_HSPL75_090_000_000.jpg',
 'OK',
 '090')

In [7]:
tfrecord_filepath = '/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/stylized_screw_heatsink_before_20200503.tfrecord'
with tf.io.TFRecordWriter(tfrecord_filepath) as writer:
    for ol in map(preprocessed_features, stylized_paths):
        example = serialize_example(ol[0], ol[1], ol[2])
        writer.write(example)
print('Exported Successfully to '+tfrecord_filepath)

Exported Successfully to /data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/stylized_screw_heatsink_before_20200503.tfrecord


### check tfrecord content

In [6]:
import tensorflow as tf
def parse_example(example_proto):    
    image_feature_description = {
        "path": tf.io.FixedLenFeature([], tf.string),
        "image": tf.io.FixedLenFeature([], tf.string),
        "degree": tf.io.FixedLenFeature([], tf.string),
        "label": tf.io.FixedLenFeature([], tf.string),
    }
    features_in_example = tf.io.parse_single_example(example_proto, image_feature_description)
    feature = {
        'path' : features_in_example['path'],
        'label' : features_in_example['label'],
    }
    return feature

to_test = '/data/aoi-wzs-p1-dip-fa-nvidia/label_heatsink_screw/tfrecord/other_comps_random_1w.tfrecord'
d = tf.data.TFRecordDataset(to_test)
d = d.map(parse_example, tf.data.experimental.AUTOTUNE)
for dd in d:
    print(dd)

{'path': <tf.Tensor: shape=(), dtype=string, numpy=b'/data/aoi-wzs-p1-dip-fa-nvidia/data-center/D4/OK/2020-08-31/CN0YTVTTWS30008T00DIA07_KJ3_000_000_000.jpg'>, 'label': <tf.Tensor: shape=(), dtype=string, numpy=b'NG'>}
{'path': <tf.Tensor: shape=(), dtype=string, numpy=b'/data/aoi-wzs-p1-dip-fa-nvidia/data-center/D5/OK/2020-08-22/CN0DK9CRWS30008L0024A03_JPWRCPU1_090_000_000.jpg'>, 'label': <tf.Tensor: shape=(), dtype=string, numpy=b'NG'>}
{'path': <tf.Tensor: shape=(), dtype=string, numpy=b'/data/aoi-wzs-p1-dip-fa-nvidia/image/20190906/OK/MBK903W20714B20_C1032_180_271_016.jpg'>, 'label': <tf.Tensor: shape=(), dtype=string, numpy=b'NG'>}
{'path': <tf.Tensor: shape=(), dtype=string, numpy=b'/data/aoi-wzs-p1-dip-fa-nvidia/data-center/D8/OK/2020-09-09/MBL831W41347C30_J76_180_000_000.jpg'>, 'label': <tf.Tensor: shape=(), dtype=string, numpy=b'NG'>}
{'path': <tf.Tensor: shape=(), dtype=string, numpy=b'/data/aoi-wzs-p1-dip-fa-nvidia/data-center/D4/OK/2020-08-12/CN0YTVTTWS30008B00FMA07_KJ3_000