# Imports

In [1]:
import tensorflow as tf

import numpy as np
import os
from pathlib import Path

from tqdm.notebook import tqdm

In [2]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

# Data

In [5]:
SCALE_SIZE = 224

In [6]:
DATA_DIR = Path('../input/beeVSwasp/')
BEE_DIR = Path('../input/beeVSwasp/bee')
WASP_DIR = Path('../input/beeVSwasp/wasp')

bee_files = sorted(list(BEE_DIR.rglob('*.jpg')))
bee_files = [str(x) for x in bee_files]

wasp_files = sorted(list(WASP_DIR.rglob('*.jpg')))
wasp_files = [str(x) for x in wasp_files]

In [7]:
BEE_SIZE = len(bee_files)
WASP_SIZE = len(wasp_files)
print(len(bee_files), len(wasp_files))

2469 2126


In [8]:
def preprocess_image(image):
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [SCALE_SIZE, SCALE_SIZE])
    image /= 255.0  # normalize to [0,1] range

    return image

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

1 == Bee

0 == Wasp

In [9]:
labels = [1 for _ in range(BEE_SIZE)] + [0 for _ in range(WASP_SIZE)]
files = bee_files + wasp_files

In [10]:
path_ds = tf.data.Dataset.from_tensor_slices(files)

In [11]:
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=AUTOTUNE)

In [12]:
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(labels, tf.int64))

In [13]:
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

In [14]:
BATCH_SIZE = 32
DATASET_SIZE = BEE_SIZE+WASP_SIZE
# Установка размера буфера перемешивания, равного набору данных, гарантирует
# полное перемешивание данных.
ds = image_label_ds.shuffle(buffer_size=DATASET_SIZE)
ds = ds.repeat()


In [15]:
TEST_SIZE = int(DATASET_SIZE * 0.3)
TRAIN_SIZE = DATASET_SIZE - TEST_SIZE


In [16]:
train_ds = ds.take(TRAIN_SIZE)
test_ds = ds.skip(TRAIN_SIZE)
# `prefetch` позволяет датасету извлекать пакеты в фоновом режиме, во время обучения модели.

train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)

test_ds = test_ds.batch(BATCH_SIZE)
test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

# Model

In [17]:
dropout_rate = 0.5

# def padding2d(inputs, padding):
#     h, w = padding 
#     return tf.pad(inputs, [[0, 0], [h, h], [w, w], [0, 0]], "CONSTANT")

def conv2d(inputs, filters, stride_size):
    out = tf.nn.conv2d(inputs, filters, strides=[1, stride_size, stride_size, 1], padding='SAME') 
    return tf.nn.relu(out) 

def maxpool(inputs, pool_size, stride_size):
    return tf.nn.max_pool2d(inputs , ksize=[1, pool_size, pool_size, 1] , padding='VALID' , strides=[1, stride_size, stride_size, 1])

def dense(inputs, weights):
    x = tf.nn.relu(tf.matmul(inputs, weights))
    return tf.nn.dropout(x, rate=dropout_rate)

In [18]:
initializer = tf.initializers.glorot_uniform()
def get_weight( shape , name ):
    return tf.Variable( initializer( shape ) , name=name , trainable=True , dtype=tf.float32 )

shapes = [
    [3, 3, 3, 64], 
    [3, 3, 64, 64], 
    
    [3, 3, 64, 128], 
    [3, 3, 128, 128],
    
    [3, 3, 128, 256], 
    [3, 3, 256, 256],
    [3, 3, 256, 256],
    
    [3, 3, 256, 512],
    [3, 3, 512, 512],
    [3, 3, 512, 512],
    
    [3, 3, 512, 512],
    [3, 3, 512, 512],
    [3, 3, 512, 512],
    
    [7*7*512, 4096],
    [4096, 4096],
    [4096, 2]
]

weights = []
for i in range(len(shapes)):
    weights.append(get_weight(shapes[i], 'weight{}'.format(i)))


In [19]:
def model(x) :
    x = tf.cast(x, dtype=tf.float32)
    c1 = conv2d(x, weights[0], stride_size=1) 
    c1 = conv2d(c1, weights[1], stride_size=1) 
    p1 = maxpool(c1, pool_size=2, stride_size=2)
    
    c2 = conv2d(p1, weights[2], stride_size=1) 
    c2 = conv2d(c2, weights[3], stride_size=1) 
    p2 = maxpool(c2, pool_size=2, stride_size=2)
    
    c3 = conv2d(p2, weights[4], stride_size=1) 
    c3 = conv2d(c3, weights[5], stride_size=1) 
    c3 = conv2d(c3, weights[6], stride_size=1) 
    p3 = maxpool(c3, pool_size=2, stride_size=2)
    
    c4 = conv2d(p3, weights[7], stride_size=1) 
    c4 = conv2d(c4, weights[8], stride_size=1) 
    c4 = conv2d(c4, weights[9], stride_size=1) 
    p4 = maxpool(c4, pool_size=2, stride_size=2)
    
    c5 = conv2d(p4, weights[10], stride_size=1) 
    c5 = conv2d(c5, weights[11], stride_size=1) 
    c5 = conv2d(c5, weights[12], stride_size=1) 
    p5 = maxpool(c5, pool_size=2, stride_size=2)

    flatten = tf.reshape(p5, shape=(tf.shape(p5)[0], -1))

    d1 = dense(flatten, weights[13])
    d2 = dense(d1, weights[14])
    
    
    logits = tf.matmul(d2, weights[15])

    return tf.nn.softmax(logits)

In [20]:
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

In [22]:
learning_rate = 1e-2
optimizer = tf.optimizers.Adam(learning_rate)

def train_step(model, inputs, outputs):
    with tf.GradientTape() as tape:
        current_loss = loss_fn(model(inputs), outputs)
    grads = tape.gradient(current_loss, weights)
    optimizer.apply_gradients(zip(grads, weights))
    # print(tf.reduce_mean(current_loss))
    return current_loss
def test_step(model, inputs, outputs):
    current_loss = loss_fn(model(inputs), outputs)
    return current_loss
    
num_epochs = 11
with tf.device('/gpu:0'):
    for e in tqdm(range(num_epochs)):
        loss_ep = 0.
        count = 0
        for features in train_ds:
            image, label = features
            loss_train = train_step(model, image, tf.one_hot(label, depth=2))
            loss_ep += loss_train
            count += 1
        
        # for features in test_ds:
        #     image, label = features
        #     loss_test = test_step(model, image, tf.one_hot(label, depth=2))
        #     loss_ep += loss_test
        #     count += 1
        print('Ep: {}, Loss:{:.2f}'.format(e, loss_ep/count))

HBox(children=(FloatProgress(value=0.0, max=11.0), HTML(value='')))

Ep: 0, Loss:0.77
Ep: 1, Loss:0.77
Ep: 2, Loss:0.77
Ep: 3, Loss:0.77
Ep: 4, Loss:0.77
Ep: 5, Loss:0.77
Ep: 6, Loss:0.77
Ep: 7, Loss:0.77
Ep: 8, Loss:0.77
Ep: 9, Loss:0.77
Ep: 10, Loss:0.77



In [23]:
x, y = iter(train_ds).next()

In [24]:
model(x)

<tf.Tensor: shape=(32, 2), dtype=float32, numpy=
array([[1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.],
       [1., 0.]], dtype=float32)>

In [25]:
y

<tf.Tensor: shape=(32,), dtype=int64, numpy=
array([0, 1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0,
       0, 0, 1, 1, 1, 0, 1, 1, 1, 0])>

Результат получился константным, модель предсказывает только второй класс. К сожалению я не успел за данное время нормально обучить модель. Пару предложений по отлаживанию модели:
+ Просмотреть классы датасетов, убедиться в том, что подаются лейблы, соответствующие изображениям
+ Более обдуманно подобрать learning rate
+ Проследить действительно ли делается шаг при градиентном спуске