In [1]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array, array_to_img
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers as L
from tensorflow.keras.models import Sequential

In [2]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

In [40]:
def change_size(image, dim=75):
    img = array_to_img(image, scale=False) #returns PIL Image
    img = img.resize((dim, dim)) #resize image
    img = img.convert(mode='RGB') #makes 3 channels
    arr = img_to_array(img) #convert back to array
    return arr.astype(np.float64)

In [4]:
def get_train_test_data(filename: str):
    data = pd.read_csv(filename, header=None).values
    assert data.shape[1] == 785
    train, test = train_test_split(data, random_state=42)
    x_train = train[:, 1:]
    y_train = train[:, 0]
    x_test = test[:, 1:]
    y_test = test[:, 0]
    return x_train, y_train, x_test, y_test

In [5]:
def add_model_top_and_compile(model, learning_rate):
  model.add(L.Flatten()) #2048
  model.add(L.Dense(256, activation='relu'))
  model.add(L.Dense(26, activation='softmax'))

  model.compile(optimizer=keras.optimizers.Adam(learning_rate=learning_rate), loss='categorical_crossentropy', metrics=['accuracy'])
  return model

In [6]:
def get_resnet50_emnist_model(learning_rate=0.0001):
  model = Sequential()

  model.add(tf.keras.applications.resnet50.ResNet50(input_shape = (75, 75, 3), 
                                  include_top = False, 
                                  weights = 'imagenet'))

  model = add_model_top_and_compile(model, learning_rate)

  for layer in model.layers[0].layers:
    # print(layer.name)
    if layer.name == 'conv5_block1_0_conv':
        break
    layer.trainable=False
  
  return model

In [7]:
def get_mobilenet_v2_emnist_model(learning_rate=0.001):
  model = Sequential()

  model.add(tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape = (75, 75, 3), 
                                  include_top = False, 
                                  weights = 'imagenet'))

  model = add_model_top_and_compile(model, learning_rate)

  for layer in model.layers[0].layers:
    # print(layer.name)
    if layer.name == 'conv5_block1_0_conv':
        break
    layer.trainable=False
  
  return model

In [8]:
x_train, y_train, x_test, y_test = get_train_test_data("emnist-letters-train.csv")

In [9]:
X_train = np.array(x_train[:30000]).reshape(-1, 28, 28, 1)
X_train = [change_size(img) for img in X_train]
X_train = np.array(X_train)
X_train.shape

(30000, 75, 75, 3)

In [10]:
X_test = np.array(x_test).reshape(-1, 28, 28, 1)
X_test = [change_size(img) for img in X_test]
X_test = np.array(X_test)
X_test.shape

(22200, 75, 75, 3)

In [11]:
y_train = y_train - 1 # 1:26 -> 0:25
y_train = y_train[:30000]
y_test = y_test - 1

In [12]:
Y_train = tf.keras.utils.to_categorical(y_train)
Y_test = tf.keras.utils.to_categorical(y_test)

In [13]:
LEARNING_RATES=[0.001, 0.0001, 0.00001]

In [14]:
for lr in LEARNING_RATES:
    model = get_resnet50_emnist_model(lr)
    model.fit(X_train, Y_train,
          batch_size=32, epochs=10,
          verbose=1)
    score = model.evaluate(X_test, Y_test)
    print('Learning rate {} Test accuracy: {}'.format(lr, score[1]))

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Learning rate 0.001 Test accuracy: 0.916531503200531
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Learning rate 0.0001 Test accuracy: 0.9359459280967712
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Learning rate 1e-05 Test accuracy: 0.9223423600196838


# Выделим результаты
Learning rate 0.001 Test accuracy: 0.916531503200531

Learning rate 0.0001 Test accuracy: 0.9359459280967712

Learning rate 1e-05 Test accuracy: 0.9223423600196838

Выглядит, что Learning rate 0.0001 не самый плохой вариант

In [None]:
model = get_resnet50_emnist_model()

In [None]:
model.fit(X_train, Y_train,
          batch_size=32, epochs=10,
          verbose=1)

In [None]:
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])

In [None]:
model = get_resnet50_emnist_model(0.00001)

In [None]:
model.fit(X_train, Y_train,
          batch_size=32, epochs=10,
          verbose=1)

In [None]:
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])

# Попробуем MobileNet V2

In [15]:
model = Sequential()

In [16]:
model.add(tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape = (96, 96, 3), 
                                  include_top = False, 
                                  weights = 'imagenet'))

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_96_no_top.h5


In [17]:
model.add(L.Flatten()) #2048
model.add(L.Dense(256, activation='relu'))
model.add(L.Dense(26, activation='softmax'))

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


In [18]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_96 (Functio (None, 3, 3, 1280)        2257984   
_________________________________________________________________
flatten_3 (Flatten)          (None, 11520)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               2949376   
_________________________________________________________________
dense_7 (Dense)              (None, 26)                6682      
Total params: 5,214,042
Trainable params: 5,179,930
Non-trainable params: 34,112
_________________________________________________________________


In [19]:
for layer in model.layers[0].layers:
    print(layer.name)

input_4
Conv1
bn_Conv1
Conv1_relu
expanded_conv_depthwise
expanded_conv_depthwise_BN
expanded_conv_depthwise_relu
expanded_conv_project
expanded_conv_project_BN
block_1_expand
block_1_expand_BN
block_1_expand_relu
block_1_pad
block_1_depthwise
block_1_depthwise_BN
block_1_depthwise_relu
block_1_project
block_1_project_BN
block_2_expand
block_2_expand_BN
block_2_expand_relu
block_2_depthwise
block_2_depthwise_BN
block_2_depthwise_relu
block_2_project
block_2_project_BN
block_2_add
block_3_expand
block_3_expand_BN
block_3_expand_relu
block_3_pad
block_3_depthwise
block_3_depthwise_BN
block_3_depthwise_relu
block_3_project
block_3_project_BN
block_4_expand
block_4_expand_BN
block_4_expand_relu
block_4_depthwise
block_4_depthwise_BN
block_4_depthwise_relu
block_4_project
block_4_project_BN
block_4_add
block_5_expand
block_5_expand_BN
block_5_expand_relu
block_5_depthwise
block_5_depthwise_BN
block_5_depthwise_relu
block_5_project
block_5_project_BN
block_5_add
block_6_expand
block_6_expand

## заблокируем обучение части сети

In [21]:
for layer in model.layers[0].layers:
    # print(layer.name)
    if layer.name == 'block_16_expand':
        break
    layer.trainable=False

In [22]:
model.summary()

Model: "sequential_3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_96 (Functio (None, 3, 3, 1280)        2257984   
_________________________________________________________________
flatten_3 (Flatten)          (None, 11520)             0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               2949376   
_________________________________________________________________
dense_7 (Dense)              (None, 26)                6682      
Total params: 5,214,042
Trainable params: 3,842,138
Non-trainable params: 1,371,904
_________________________________________________________________


Количество тренируемых параметров упало с 5,179,930 до 3,842,138 - где-то на треть. Видимо, слишком много параметров в top

In [44]:
model = Sequential()
model.add(tf.keras.applications.mobilenet_v2.MobileNetV2(input_shape = (96, 96, 3), 
                                  include_top = False, 
                                  weights = 'imagenet'))
model.add(L.Flatten()) #2048
model.add(L.Dense(128, activation='relu'))
model.add(L.Dense(26, activation='softmax'))

model.compile(optimizer=keras.optimizers.Adam(learning_rate=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])


In [34]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_96 (Functio (None, 3, 3, 1280)        2257984   
_________________________________________________________________
flatten_8 (Flatten)          (None, 11520)             0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               1474688   
_________________________________________________________________
dense_17 (Dense)             (None, 26)                3354      
Total params: 3,736,026
Trainable params: 3,701,914
Non-trainable params: 34,112
_________________________________________________________________


In [45]:
for layer in model.layers[0].layers:
    # print(layer.name)
    if layer.name == 'block_16_expand':
        break
    layer.trainable=False

In [36]:
model.summary()

Model: "sequential_6"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_96 (Functio (None, 3, 3, 1280)        2257984   
_________________________________________________________________
flatten_8 (Flatten)          (None, 11520)             0         
_________________________________________________________________
dense_16 (Dense)             (None, 128)               1474688   
_________________________________________________________________
dense_17 (Dense)             (None, 26)                3354      
Total params: 3,736,026
Trainable params: 2,364,122
Non-trainable params: 1,371,904
_________________________________________________________________


In [41]:
X_train = np.array(x_train[:30000]).reshape(-1, 28, 28, 1)
X_train = [change_size(img, 96) for img in X_train]
X_train = np.array(X_train)
X_train.shape

(30000, 96, 96, 3)

In [42]:
X_test = np.array(x_test).reshape(-1, 28, 28, 1)
X_test = [change_size(img, 96) for img in X_test]
X_test = np.array(X_test)
X_test.shape

(22200, 96, 96, 3)

In [46]:
model.fit(X_train, Y_train,
          batch_size=32, epochs=10,
          verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f3da0929908>

In [None]:
score = model.evaluate(X_test, Y_test)
print('Test score:', score[0])
print('Test accuracy:', score[1])