# Лабораторная работа №4. Реализация приложения по распознаванию номеров домов

In [2]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
import h5py
import numpy as np
from scipy.io import loadmat
import gc

2025-04-30 22:34:18.693164: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-04-30 22:34:18.693634: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-30 22:34:18.695775: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-04-30 22:34:18.701331: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1746041658.711091     249 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1746041658.71

Загрузим данные `MNIST`, нормализуем их и разделим на тренировочный и тестовый набор

Загрузка данных

In [3]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

Одной из самых популярных из современных архитектур моделей для распознавания изображений является `ResNet`. Эта сеть позволяет передавать информацию между слоями, сохраняя полезные данные.

In [4]:
def residual_block(x, filters, downsample=False):
    shortcut = x
    strides = 2 if downsample else 1

    x = layers.Conv2D(filters, (3, 3), strides=strides, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv2D(filters, (3, 3), padding="same")(x)
    x = layers.BatchNormalization()(x)

    if downsample or x.shape[-1] != shortcut.shape[-1]:
        shortcut = layers.Conv2D(filters, (1, 1), strides=strides, padding="same")(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x


Упростим функцию пропуска блока для работы на мобильных устройствах

In [5]:
def residual_block_light(x, filters):
    shortcut = layers.Conv2D(filters, (1, 1), padding="same")(x)
    x = layers.Conv2D(filters, (3, 3), padding="same", activation="relu")(x)
    x = layers.Conv2D(filters, (3, 3), padding="same")(x)
    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x

Построим облегченную версию `ResNet`, которая подойдет для работы с мобильными устройствами

In [6]:
inputs = keras.Input(shape=(28, 28, 1))  # 28×28 черно-белые изображения
x = layers.Conv2D(16, (3, 3), activation="relu", padding="same")(inputs)
x = residual_block_light(x, 16)
x = layers.MaxPooling2D((2, 2))(x)

x = residual_block(x, 32)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dense(10, activation="softmax")(x)  # 10 классов цифр

model = keras.Model(inputs, x)

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

2025-04-30 22:36:02.145447: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Обучим и протестируем модель

In [7]:
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Точность модели на тестовых данных: {test_acc:.4f}')

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 7ms/step - accuracy: 0.6288 - loss: 1.0395 - val_accuracy: 0.9014 - val_loss: 0.3292
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 7ms/step - accuracy: 0.9404 - loss: 0.1950 - val_accuracy: 0.9395 - val_loss: 0.1934
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 9ms/step - accuracy: 0.9631 - loss: 0.1253 - val_accuracy: 0.9748 - val_loss: 0.0836
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 9ms/step - accuracy: 0.9714 - loss: 0.0960 - val_accuracy: 0.9759 - val_loss: 0.0853
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9769 - loss: 0.0769 - val_accuracy: 0.9763 - val_loss: 0.0791
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 9ms/step - accuracy: 0.9799 - loss: 0.0647 - val_accuracy: 0.9843 - val_loss: 0.0500
Epoch 7/10

Очистим память

In [8]:
del x_train
del y_train
del x_test
del y_test

gc.collect()

1040

**Задание 2.** После уточнения модели на синтетических данных попробуйте обучить ее на реальных данных (набор Google Street View). Что изменилось в модели?

Загрузим набор тестовых данных `SVHN` (Street View House Numbers) 

In [9]:
# Загружаем train
train = loadmat('train_32x32.mat')
x_train = np.transpose(train['X'], (3, 0, 1, 2)).astype(np.float32) / 255.0
y_train = train['y'].astype(np.uint8).flatten()
y_train[y_train == 10] = 0
del train
gc.collect()  # принудительная сборка мусора

# Загружаем test
test = loadmat('test_32x32.mat')
x_test = np.transpose(test['X'], (3, 0, 1, 2)).astype(np.float32) / 255.0
y_test = test['y'].astype(np.uint8).flatten()
y_test[y_test == 10] = 0
del test
gc.collect()

0

Преобразуем изображения оригинальные изображения (32x32, цветные) в черно-белые и уменьшим их размер до 28x28

In [10]:
x_train = tf.image.rgb_to_grayscale(x_train).numpy()
x_test = tf.image.rgb_to_grayscale(x_test).numpy()

x_train = tf.image.resize(x_train, [28, 28]).numpy()
x_test = tf.image.resize(x_test, [28, 28]).numpy()

Дообучим модель на новых данных

In [11]:
model.fit(x_train, y_train, epochs=10, batch_size=32, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Точность модели на тестовых данных: {test_acc:.4f}')

Epoch 1/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 7ms/step - accuracy: 0.3401 - loss: 1.9277 - val_accuracy: 0.7557 - val_loss: 0.8035
Epoch 2/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 10ms/step - accuracy: 0.7564 - loss: 0.7781 - val_accuracy: 0.8392 - val_loss: 0.5352
Epoch 3/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.8300 - loss: 0.5491 - val_accuracy: 0.8718 - val_loss: 0.4325
Epoch 4/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.8577 - loss: 0.4612 - val_accuracy: 0.8820 - val_loss: 0.4096
Epoch 5/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.8761 - loss: 0.4011 - val_accuracy: 0.8941 - val_loss: 0.3574
Epoch 6/10
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 9ms/step - accuracy: 0.8917 - loss: 0.3580 - val_accuracy: 0.9025 - val_loss: 0.3343
Epoch 7/1

Сохраним модель

In [13]:
model.export("saved_model/resnet_model")

INFO:tensorflow:Assets written to: saved_model/resnet_model/assets


INFO:tensorflow:Assets written to: saved_model/resnet_model/assets


Saved artifact at 'saved_model/resnet_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  129075578579152: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552935312: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552938384: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552937040: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552937232: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552933008: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552938192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552938768: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552940496: TensorSpec(shape=(), dtype=tf.resource, name=None)
  129075552942032: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1290755