# Обучение с учителем для решения задач коммивояжера


In [None]:
import os
import tensorflow as tf
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Скрывает INFO-логи (оставляет WARNING и ERROR)
tf.get_logger().setLevel('ERROR')  # Скрывает большинство логов TensorFlow
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

In [2]:
import numpy as np
from tensorflow.keras.layers import Input, Dense, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tqdm import tqdm
from python_tsp.exact import solve_tsp_dynamic_programming
from sklearn.metrics import mean_absolute_percentage_error

### Модель предсказывает какие дуги входят в маршрут, но не их последовательность

In [3]:
class TSPSolver:
    def __init__(self, num_cities, hidden_dim=128):
        self.num_cities = num_cities
        self.hidden_dim = hidden_dim
        self.model = self._build_model()
    
    def _build_model(self):
        # Вход: матрица расстояний (batch, cities, cities)
        inputs = Input(shape=(self.num_cities, self.num_cities))
        
        # Кодировщик на основе полносвязных слоев
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        x = LayerNormalization()(x)
        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)
        
        # Выходной слой - вероятности переходов
        logits = Dense(self.num_cities)(x)
        outputs = tf.keras.activations.softmax(logits)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(0.001), loss=self._custom_loss)
        return model
    
    def _custom_loss(self, y_true, y_pred):
        # y_true: маска посещенных городов (batch, cities, cities)
        # y_pred: вероятности переходов (batch, cities, cities)
        
        # Применяем маску к предсказаниям
        masked_pred = y_pred * y_true
        
        # Нормализуем вероятности
        masked_pred = masked_pred / (K.sum(masked_pred, axis=-1, keepdims=True) + K.epsilon())
        
        # Вычисляем кросс-энтропию
        loss = -K.sum(y_true * K.log(masked_pred + K.epsilon()), axis=-1)
        return K.mean(loss)
    
    def train(self, X_train, routes, epochs=50, batch_size=32):
        """
        X_train: матрицы расстояний (samples, cities, cities)
        routes: оптимальные маршруты (samples, cities)
        """
        # Создаем маски переходов для обучения
        y_masks = np.zeros_like(X_train)
        
        for i, route in enumerate(routes):
            for j in range(len(route)-1):
                from_city = route[j]
                to_city = route[j+1]
                y_masks[i, from_city, to_city] = 1
        
        self.model.fit(
            X_train,
            y_masks,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.1
        )
    
    def predict_route(self, dist_matrix):
        current = 0
        visited = {current}
        route = [current]
        
        for _ in range(self.num_cities-1):
            # Получаем предсказания
            probs = self.model.predict(dist_matrix[np.newaxis, ...], verbose=0)[0, current]
            
            # Маскируем посещенные города
            probs[list(visited)] = -np.inf
            
            # Выбираем следующий город
            next_city = np.argmax(probs)
            route.append(next_city)
            visited.add(next_city)
            current = next_city
        
        return np.array(route)

In [4]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


### Подготовка исходных данных и поиск точного решения методом динамического программирования занимает час

In [36]:
N = 12
nlen = N*N
rand = np.random.RandomState(1)

In [None]:
cnt = 50000
lx = []
ly = []
ld = []
for i in tqdm(range(cnt)):
    a = rand.normal(20, 5, size=(N, N))
    #a = np.round((a + a.T)/2,2)
    a[np.eye(N)==1] = 0
    permutation, distance = solve_tsp_dynamic_programming(a)
    lx.append(a)
    ly.append(permutation)
    ld.append(distance)
X_train = np.array(lx)
Y_train = np.array(ly)
D_train = np.array(ld)

In [45]:
#np.save('X_train', X_train)
#np.save('Y_train', Y_train)

### Инициализируем и обучаем модель

In [52]:
solver = TSPSolver(num_cities=N)
solver.train(X_train, Y_train, epochs=40)

Epoch 1/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 2ms/step - loss: 9.4134e-07 - val_loss: 5.9144e-07
Epoch 2/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 993us/step - loss: 5.2172e-07 - val_loss: 3.9544e-07
Epoch 3/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3.7994e-07 - val_loss: 3.5521e-07
Epoch 4/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 3.4963e-07 - val_loss: 3.4190e-07
Epoch 5/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 990us/step - loss: 3.3694e-07 - val_loss: 3.3089e-07
Epoch 6/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 983us/step - loss: 3.3046e-07 - val_loss: 3.2723e-07
Epoch 7/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 963us/step - loss: 3.2611e-07 - val_loss: 3.2397e-07
Epoch 8/40
[1m1407/1407[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 964

### Проверка обученной модели

In [53]:
cnt = 30
ld = []
lp = []
lr = []
for i in tqdm(range(cnt)):
    a = rand.normal(20, 5, size=(N, N))
    # a = np.round((a + a.T)/2,2)
    a[np.eye(N)==1] = 0
    permutation, distance = solve_tsp_dynamic_programming(a)
    route = solver.predict_route(a)
    total_dist = sum(a[route[i], route[i+1]] for i in range(N-1))
    total_dist += a[route[-1], route[0]]
    ld.append(distance)
    lp.append(total_dist)
Y_predict = np.array(lp)
Y_true = np.array(ld)

100%|██████████| 30/30 [00:24<00:00,  1.25it/s]


In [54]:
mean_absolute_percentage_error(Y_true,Y_predict)

0.11158602195485692

In [61]:
# Сколько случаев действительно плохого прогноза
sum(((Y_predict - Y_true) / Y_true) > 0.2)

np.int64(4)