# db to dataset

In [1]:
import tensorflow as tf
import sqlite3
ly = tf.keras.layers

In [2]:
physical_devices = tf.config.experimental.list_physical_devices('GPU')
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [3]:
db_path = '/p3/metadata.db'
conn = sqlite3.connect(db_path)
c = conn.cursor()

In [4]:
DEGREE_CLASS_LIST = []
c.execute('''
    select distinct degree from metadata 
    where label = 'OK'
    and (component = 'AluCap' or component = 'ElecCap')
    and (degree = '0' or degree = '270')
    and width is not NULL
''')

for i, in c.fetchall():
    print(i)
    DEGREE_CLASS_LIST.append(f'{i}')

270
0


In [5]:
c.execute('''
    select distinct degree from metadata 
    where label = 'NG-InversePolarity' and
    component = 'AluCap'
    and degree >= 0 
''')
for i, in c.fetchall():
    d = str((int(i) + 180) % 360)
    print(d)
    DEGREE_CLASS_LIST.append(f'{d}')

180
90


In [6]:
DEGREE_CLASS_LIST = sorted(DEGREE_CLASS_LIST)
DEGREE_CLASS_LIST

['0', '180', '270', '90']

In [7]:
OK_DEGREE_INDEX_LOOKUP = tf.constant(list(range(len(DEGREE_CLASS_LIST))), dtype=tf.int64)
NG_DEGREE_INDEX_LOOKUP = tf.constant([1, 0, 3, 2], dtype=tf.int64)
DEGREE_NUM = len(DEGREE_CLASS_LIST)
ok_lookup = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(DEGREE_CLASS_LIST, OK_DEGREE_INDEX_LOOKUP), -1)
ng_lookup = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(DEGREE_CLASS_LIST, NG_DEGREE_INDEX_LOOKUP), -1)

In [8]:
ng_lookup.lookup(tf.constant('270'))

<tf.Tensor: shape=(), dtype=int64, numpy=3>

In [9]:
ok_num = c.execute('''
        select count(*) from metadata
        where (label = 'OK')
        and (component = 'AluCap' or component = 'ElecCap')
        and (degree = '0' or degree = '270')
        and width is not NULL
        ''').fetchone()[0]
print(ok_num)

265834


In [10]:
ng_num = c.execute('''
        select count(*) from metadata
        where (label = 'NG-InversePolarity')
        and (component = 'AluCap' or component = 'ElecCap')
        and (degree = '0' or degree = '270')
        and width is not NULL
        ''').fetchone()[0]
print(ng_num)

985


In [11]:
biggest_wh = c.execute('''
        select distinct width, height from metadata
        where (label = 'OK' or label = 'NG-InversePolarity')
        and (component = 'AluCap' or component = 'ElecCap')
        and (degree = '0' or degree = '270')
        and width is not NULL
        ''').fetchall()
biggest_size = 0
for w, h in biggest_wh:
    try:
        if int(h) > biggest_size:
            biggest_size = int(h)
        if int(w) > biggest_size:
            biggest_size = int(w)
    except:
        print(w, h)
target_shape = (biggest_size, biggest_size, 3)
target_shape

(125, 125, 3)

In [12]:
batch_size = 256
valid_size = int(ok_num * 0.2)
train_size = ok_num - valid_size
train_step = int(train_size / batch_size)
valid_step = int(valid_size / batch_size)

In [13]:
ok_ds = tf.data.experimental.SqlDataset(
    'sqlite', db_path,
    '''
        select path, degree from metadata
        where (label = 'OK')
        and (component = 'AluCap' or component = 'ElecCap')
        and (degree = '0' or degree = '270')
        and width is not NULL
        ''', 
    (tf.string, tf.string)
)

ng_ds = tf.data.experimental.SqlDataset(
    'sqlite', db_path,
    '''
        select path, degree from metadata
        where (label = 'NG-InversePolarity')
        and (component = 'AluCap' or component = 'ElecCap')
        and (degree = '0' or degree = '270')
        and width is not NULL
        ''', 
    (tf.string, tf.string)
)

In [14]:
for p, d in ok_ds.take(1):
    print(p, d)
for p, d in ng_ds.take(1):
    print(p, d)

tf.Tensor(b'/data/aoi-wzs-p3-dip-prewave-saiap/Phase2-Cap/0904/AluCapacitor/Model-2/OK/270/CN01W26NWS20099302YBA00_PT4701_90_NA_NA.png', shape=(), dtype=string) tf.Tensor(b'270', shape=(), dtype=string)
tf.Tensor(b'/data/aoi-wzs-p3-dip-prewave-saiap/Phase2-Cap/0904/AluCapacitor/Model-2/NG/0/CN01W26NWS20099302GLA00_PT5101_39.bmp', shape=(), dtype=string) tf.Tensor(b'0', shape=(), dtype=string)


In [15]:
def process_ok(path, degree):
    byte_string_img = tf.io.read_file(path)
    img = tf.io.decode_image(byte_string_img, channels=target_shape[-1], dtype=tf.dtypes.float32)
#     img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_with_crop_or_pad(img, target_shape[1], target_shape[0])
    deg = ok_lookup.lookup(degree)
    return img, deg

def process_ng(path, degree):
    byte_string_img = tf.io.read_file(path)
    img = tf.io.decode_image(byte_string_img, channels=target_shape[-1], dtype=tf.dtypes.float32)
#     img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_with_crop_or_pad(img, target_shape[1], target_shape[0])
    deg = ng_lookup.lookup(degree)
    return img, deg

In [16]:
pro_ok_ds = ok_ds.shuffle(20000).map(process_ok, tf.data.experimental.AUTOTUNE)
pro_ng_ds = ng_ds.shuffle(ng_num).map(process_ng, tf.data.experimental.AUTOTUNE)

In [17]:
for i, d in pro_ng_ds.take(1):
    print(i.shape, d)

(125, 125, 3) tf.Tensor(3, shape=(), dtype=int64)


In [18]:
pro_ds = tf.data.experimental.sample_from_datasets([pro_ok_ds.repeat(), pro_ng_ds.repeat()], weights=[0.5, 0.5])

In [19]:
train_ds = pro_ds.skip(valid_size).cache().repeat().batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
valid_ds = pro_ds.take(valid_size).cache().repeat().batch(batch_size).prefetch(buffer_size=tf.data.experimental.AUTOTUNE)

In [20]:
def model_simpleConv(conv_count, first_filter, kernel_size, strides, input_shape, degree_num, maxbool=False):
  input_image = tf.keras.Input(input_shape, name='image')
  x = input_image
  for i in range(conv_count):
    x = ly.Conv2D(first_filter*(i+1), kernel_size, (strides, strides), 
                  padding='same', name='conv'+str(i+1), activation=tf.nn.relu)(x)
    x = ly.BatchNormalization()(x)
    if maxbool:
        x = ly.MaxPool2D()(x)
  x = ly.GlobalAveragePooling2D(name='GAP')(x)
  x = ly.Dense(degree_num, name='dense_logits')(x)
  x = ly.Activation('softmax', dtype='float32', name='predictions')(x)
  return tf.keras.Model(inputs=input_image, outputs=x)

In [21]:
policy = tf.keras.mixed_precision.experimental.Policy('mixed_float16')
tf.keras.mixed_precision.experimental.set_policy(policy)

In [22]:
m = model_simpleConv(4, 8, 3, 2, target_shape, DEGREE_NUM)
m.summary()

Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
image (InputLayer)           [(None, 125, 125, 3)]     0         
_________________________________________________________________
conv1 (Conv2D)               (None, 63, 63, 8)         224       
_________________________________________________________________
batch_normalization (BatchNo (None, 63, 63, 8)         32        
_________________________________________________________________
conv2 (Conv2D)               (None, 32, 32, 16)        1168      
_________________________________________________________________
batch_normalization_1 (Batch (None, 32, 32, 16)        64        
_________________________________________________________________
conv3 (Conv2D)               (None, 16, 16, 24)        3480      
_________________________________________________________________
batch_normalization_2 (Batch (None, 16, 16, 24)        96    

In [23]:
m.compile('adam', 'sparse_categorical_crossentropy', ['acc'])

In [None]:
# epochs = 10, 8/10 kernel restarting
m.fit(train_ds, epochs=10, validation_data=valid_ds, 
    steps_per_epoch=train_step, validation_steps=valid_step)

Train for 830 steps, validate for 207 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10