# Обучение с учителем для решения задач коммивояжера


In [1]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Скрывает INFO-логи (оставляет WARNING и ERROR)
tf.get_logger().setLevel('ERROR')  # Скрывает большинство логов TensorFlow
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

2025-04-12 13:31:27.081266: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1744464687.093499    4228 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1744464687.097549    4228 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1744464687.106982    4228 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744464687.106991    4228 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1744464687.106992    4228 computation_placer.cc:177] computation placer alr

In [2]:
import numpy as np
from tensorflow.keras.layers import Input, Dense, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tqdm import tqdm
from python_tsp.exact import solve_tsp_dynamic_programming, solve_tsp_branch_and_bound
from python_tsp.heuristics import solve_tsp_local_search, solve_tsp_simulated_annealing
from python_tsp.heuristics import solve_tsp_lin_kernighan, solve_tsp_record_to_record
from sklearn.metrics import mean_absolute_percentage_error

### Модель предсказывает какие дуги входят в маршрут, но не их последовательность

In [3]:
class TSPSolver:
    def __init__(self, num_cities, hidden_dim=256):
        self.num_cities = num_cities
        self.hidden_dim = hidden_dim
        self.model = self._build_model()
    
    def _build_model(self):
        # Вход: матрица расстояний (batch, cities, cities)
        inputs = Input(shape=(self.num_cities, self.num_cities))
        
        # Кодировщик на основе полносвязных слоев
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        x = LayerNormalization()(x)
        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)

        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)
        
        # Выходной слой - вероятности переходов
        logits = Dense(self.num_cities)(x)
        outputs = tf.keras.activations.softmax(logits)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(0.001), loss=self._custom_loss)
        return model
    
    def _custom_loss(self, y_true, y_pred):
        # y_true: маска посещенных городов (batch, cities, cities)
        # y_pred: вероятности переходов (batch, cities, cities)
        
        # Применяем маску к предсказаниям
        masked_pred = y_pred * y_true
        
        # Нормализуем вероятности
        masked_pred = masked_pred / (K.sum(masked_pred, axis=-1, keepdims=True) + K.epsilon())
        
        # Вычисляем кросс-энтропию
        loss = -K.sum(y_true * K.log(masked_pred + K.epsilon()), axis=-1)
        return K.mean(loss)
    
    def train(self, X_train, routes, epochs=50, batch_size=128):
        """
        X_train: матрицы расстояний (samples, cities, cities)
        routes: оптимальные маршруты (samples, cities)
        """
        # Создаем маски переходов для обучения
        y_masks = np.zeros_like(X_train)
        
        for i, route in enumerate(routes):
            for j in range(len(route)-1):
                from_city = route[j]
                to_city = route[j+1]
                y_masks[i, from_city, to_city] = 1
        
        self.model.fit(
            X_train,
            y_masks,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.1
        )

In [4]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


### Подготовка исходных данных и поиск точного решения методом динамического программирования занимает час

In [5]:
N = 12
nlen = N*N
rand = np.random.RandomState(1)

In [None]:
cnt = 50000
lx = []
ly = []
ld = []
for i in tqdm(range(cnt)):
    a = rand.normal(20, 5, size=(N, N))
    #a = np.round((a + a.T)/2,2)
    a[np.eye(N)==1] = 0
    permutation, distance = solve_tsp_dynamic_programming(a)
    lx.append(a)
    ly.append(permutation)
    ld.append(distance)
X_train = np.array(lx)
Y_train = np.array(ly)
D_train = np.array(ld)

In [6]:
#np.save('X_train', X_train)
#np.save('Y_train', Y_train)
X_train = np.load('X_train.npy')
Y_train = np.load('Y_train.npy')

### Инициализируем и обучаем модель

In [7]:
solver = TSPSolver(num_cities=N)
solver.train(X_train, Y_train, epochs=40)

I0000 00:00:1744464701.683900    4228 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 22335 MB memory:  -> device: 0, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:01:00.0, compute capability: 8.6
I0000 00:00:1744464701.685246    4228 gpu_device.cc:2019] Created device /job:localhost/replica:0/task:0/device:GPU:1 with 22335 MB memory:  -> device: 1, name: NVIDIA GeForce RTX 3090, pci bus id: 0000:06:00.0, compute capability: 8.6


Epoch 1/40


I0000 00:00:1744464703.952465    4334 service.cc:152] XLA service 0x7498e400cb60 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1744464703.952477    4334 service.cc:160]   StreamExecutor device (0): NVIDIA GeForce RTX 3090, Compute Capability 8.6
I0000 00:00:1744464703.952479    4334 service.cc:160]   StreamExecutor device (1): NVIDIA GeForce RTX 3090, Compute Capability 8.6
2025-04-12 13:31:43.985846: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:269] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
I0000 00:00:1744464704.183780    4334 cuda_dnn.cc:529] Loaded cuDNN version 90800


[1m119/352[0m [32m━━━━━━[0m[37m━━━━━━━━━━━━━━[0m [1m0s[0m 1ms/step - loss: 1.6464e-06

I0000 00:00:1744464705.440756    4334 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 7ms/step - loss: 1.1534e-06 - val_loss: 6.7128e-07
Epoch 2/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 6.4378e-07 - val_loss: 5.5125e-07
Epoch 3/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 5.1821e-07 - val_loss: 4.3618e-07
Epoch 4/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 4.1789e-07 - val_loss: 3.8215e-07
Epoch 5/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3.7408e-07 - val_loss: 3.5985e-07
Epoch 6/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3.5458e-07 - val_loss: 3.4532e-07
Epoch 7/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 3.4266e-07 - val_loss: 3.3801e-07
Epoch 8/40
[1m352/352[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3.3552e-07 - val_loss

### Проверка обученной модели

In [8]:
def predict_route(solver, dist_matrix, is_rnd = False, num_iter = 500):
    ap = solver.model.predict(dist_matrix[np.newaxis, ...], verbose=0)[0]
    num_cities = dist_matrix.shape[0]
    best_route = []
    best_dist = np.inf
    for _ in range(num_iter):
        current = 0
        route = [current]
        total_dist = 0
        for _ in range(num_cities-1):
            probs = ap[current].copy()
            if is_rnd: probs = np.full_like(probs, 1) 
            # Маскируем посещенные города
            probs[route] = 0
            # Выбираем следующий город
            next_city = rand.choice(range(num_cities), p=(probs / np.sum(probs)))
            route.append(next_city)
            total_dist += dist_matrix[current, next_city]
            current = next_city
        total_dist += dist_matrix[route[-1], route[0]]
        if total_dist < best_dist:
            best_dist = total_dist
            best_route = route
    return np.array(best_route), best_dist

In [9]:
def predict_geedy_route(dist_matrix):
    num_cities = dist_matrix.shape[0]
    current = 0
    route = [current]
    next_city = np.argmax(dist_matrix[current])
    route.append(next_city)
    total_dist = dist_matrix[current, next_city]
    current = next_city
    for _ in range(num_cities-2):
        a = dist_matrix[current].copy()
        # Маскируем посещенные города
        a[route] = np.inf
        # Выбираем следующий город
        next_city = np.argmin(a)
        route.append(next_city)
        total_dist += dist_matrix[current, next_city]
        current = next_city
    total_dist += dist_matrix[route[-1], route[0]]
    return np.array(route), total_dist

In [10]:
cnt = 50
ld = []
lp = []
lg = []
lr = []
for i in tqdm(range(cnt)):
    a = rand.normal(20, 5, size=(N, N))
    # a = np.round((a + a.T)/2,2)
    a[np.eye(N)==1] = 0
    permutation, distance = solve_tsp_dynamic_programming(a)
    route, total_dist = predict_route(solver, a, False, 500)
    route, dist = predict_geedy_route(a)
    route, rdist = predict_route(solver, a, True, 500)
    ld.append(distance)
    lp.append(total_dist)
    lg.append(dist)
    lr.append(rdist)
Y_predict = np.array(lp)
Y_true = np.array(ld)
Y_greedy = np.array(lg)
Y_rnd = np.array(lr)

100%|██████████| 50/50 [00:31<00:00,  1.56it/s]


In [11]:
mean_absolute_percentage_error(Y_true, Y_predict), \
mean_absolute_percentage_error(Y_true, Y_greedy), \
mean_absolute_percentage_error(Y_true, Y_rnd),

(0.027700233725187675, 0.22477120035236425, 0.18746048605617763)

In [12]:
# Сколько случаев действительно плохого прогноза
sum(((Y_predict - Y_true) / Y_true) > 0.2), \
sum(((Y_greedy - Y_true) / Y_true) > 0.2), \
sum(((Y_rnd - Y_true) / Y_true) > 0.2)

(np.int64(0), np.int64(32), np.int64(20))

In [37]:
%timeit predict_geedy_route(a)

49 μs ± 3.68 μs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [166]:
%timeit predict_route(solver, a, False)

261 ms ± 759 μs per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [39]:
%timeit solve_tsp_dynamic_programming(a)

52.7 ms ± 582 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [52]:
%timeit predict_route(solver, a, True)

105 ms ± 323 μs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [70]:
%timeit solve_tsp_branch_and_bound(a)

13 ms ± 142 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [82]:
%timeit solve_tsp_local_search(a)

1.58 ms ± 22.9 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [86]:
%timeit solve_tsp_simulated_annealing(a)

51.2 ms ± 12.9 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [None]:
# Не стабильно
# solve_tsp_lin_kernighan(a), solve_tsp_record_to_record(a[:5,:5])