# Лабораторная работа №4. Реализация приложения по распознаванию номеров домов

In [1]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.datasets import mnist
import h5py
import numpy as np
from scipy.io import loadmat
import gc

2025-05-18 01:11:29.402792: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-18 01:11:29.410862: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-18 01:11:29.464386: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2025-05-18 01:11:29.519352: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1747519889.567871    2484 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1747519889.58

Загрузим данные `MNIST`, нормализуем их и разделим на тренировочный и тестовый набор

Загрузка данных

In [2]:
(x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data()

x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0

x_train = np.expand_dims(x_train, axis=-1)
x_test = np.expand_dims(x_test, axis=-1)

Одной из самых популярных из современных архитектур моделей для распознавания изображений является `ResNet`. Эта сеть позволяет передавать информацию между слоями, сохраняя полезные данные.

In [3]:
def residual_block(x, filters, downsample=False):
    shortcut = x
    strides = 2 if downsample else 1

    x = layers.Conv2D(filters, (3, 3), strides=strides, padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)

    x = layers.Conv2D(filters, (3, 3), padding="same")(x)
    x = layers.BatchNormalization()(x)

    if downsample or x.shape[-1] != shortcut.shape[-1]:
        shortcut = layers.Conv2D(filters, (1, 1), strides=strides, padding="same")(shortcut)
        shortcut = layers.BatchNormalization()(shortcut)

    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x


Упростим функцию пропуска блока для работы на мобильных устройствах

In [4]:
def residual_block_light(x, filters):
    shortcut = layers.Conv2D(filters, (1, 1), padding="same")(x)
    x = layers.Conv2D(filters, (3, 3), padding="same", activation="relu")(x)
    x = layers.Conv2D(filters, (3, 3), padding="same")(x)
    x = layers.Add()([x, shortcut])
    x = layers.Activation("relu")(x)
    return x

Построим облегченную версию `ResNet`, которая подойдет для работы с мобильными устройствами

In [5]:
inputs = keras.Input(shape=(28, 28, 1))  # 28×28 черно-белые изображения
x = layers.Conv2D(16, (3, 3), activation="relu", padding="same")(inputs)
x = residual_block_light(x, 16)
x = layers.MaxPooling2D((2, 2))(x)

x = residual_block(x, 32)
x = layers.MaxPooling2D((2, 2))(x)

x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(32, activation="relu")(x)
x = layers.Dense(10, activation="softmax")(x)  # 10 классов цифр

model = keras.Model(inputs, x)

model.compile(optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"])
model.summary()

2025-05-18 01:11:34.257389: E external/local_xla/xla/stream_executor/cuda/cuda_platform.cc:51] failed call to cuInit: INTERNAL: CUDA error: Failed call to cuInit: UNKNOWN ERROR (303)


Обучим и протестируем модель

In [6]:
model.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Точность модели на тестовых данных: {test_acc:.4f}')

Epoch 1/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 10ms/step - accuracy: 0.7727 - loss: 0.8010 - val_accuracy: 0.9451 - val_loss: 0.1728
Epoch 2/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m18s[0m 10ms/step - accuracy: 0.9749 - loss: 0.0861 - val_accuracy: 0.8060 - val_loss: 0.5694
Epoch 3/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 10ms/step - accuracy: 0.9819 - loss: 0.0591 - val_accuracy: 0.9810 - val_loss: 0.0603
Epoch 4/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - accuracy: 0.9845 - loss: 0.0492 - val_accuracy: 0.9219 - val_loss: 0.2509
Epoch 5/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - accuracy: 0.9870 - loss: 0.0424 - val_accuracy: 0.9689 - val_loss: 0.0953
Epoch 6/50
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 11ms/step - accuracy: 0.9890 - loss: 0.0360 - val_accuracy: 0.9755 - val_loss: 0.0802
Epoc

Очистим память

In [7]:
del x_train
del y_train
del x_test
del y_test

gc.collect()

1038

**Задание 2.** После уточнения модели на синтетических данных попробуйте обучить ее на реальных данных (набор Google Street View). Что изменилось в модели?

Загрузим набор тестовых данных `SVHN` (Street View House Numbers) 

In [8]:
# Загружаем train
train = loadmat('train_32x32.mat')
x_train = np.transpose(train['X'], (3, 0, 1, 2)).astype(np.float32) / 255.0
y_train = train['y'].astype(np.uint8).flatten()
y_train[y_train == 10] = 0
del train
gc.collect()  # принудительная сборка мусора

# Загружаем test
test = loadmat('test_32x32.mat')
x_test = np.transpose(test['X'], (3, 0, 1, 2)).astype(np.float32) / 255.0
y_test = test['y'].astype(np.uint8).flatten()
y_test[y_test == 10] = 0
del test
gc.collect()

0

Преобразуем изображения оригинальные изображения (32x32, цветные) в черно-белые и уменьшим их размер до 28x28

In [9]:
x_train = tf.image.rgb_to_grayscale(x_train).numpy()
x_test = tf.image.rgb_to_grayscale(x_test).numpy()

x_train = tf.image.resize(x_train, [28, 28]).numpy()
x_test = tf.image.resize(x_test, [28, 28]).numpy()

Дообучим модель на новых данных

In [10]:
model.fit(x_train, y_train, epochs=50, batch_size=32, validation_data=(x_test, y_test))
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f'Точность модели на тестовых данных: {test_acc:.4f}')

Epoch 1/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 11ms/step - accuracy: 0.4490 - loss: 1.9078 - val_accuracy: 0.7011 - val_loss: 0.9297
Epoch 2/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 12ms/step - accuracy: 0.7485 - loss: 0.7993 - val_accuracy: 0.7266 - val_loss: 0.8625
Epoch 3/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 12ms/step - accuracy: 0.7991 - loss: 0.6328 - val_accuracy: 0.7955 - val_loss: 0.6402
Epoch 4/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m26s[0m 12ms/step - accuracy: 0.8269 - loss: 0.5518 - val_accuracy: 0.8541 - val_loss: 0.4754
Epoch 5/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 12ms/step - accuracy: 0.8479 - loss: 0.4949 - val_accuracy: 0.8266 - val_loss: 0.5634
Epoch 6/50
[1m2290/2290[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m27s[0m 12ms/step - accuracy: 0.8593 - loss: 0.4537 - val_accuracy: 0.8820 - val_loss: 0.3964
Epoc

Сохраним модель

In [11]:
model.export("saved_model/resnet_model")

INFO:tensorflow:Assets written to: saved_model/resnet_model/assets


INFO:tensorflow:Assets written to: saved_model/resnet_model/assets


Saved artifact at 'saved_model/resnet_model'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 28, 28, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 10), dtype=tf.float32, name=None)
Captures:
  138427581809872: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581811216: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581813520: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581812176: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581812368: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581811024: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581813328: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581813904: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581814480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  138427581815440: TensorSpec(shape=(), dtype=tf.resource, name=None)
  1384275