# Обучение с учителем для решения задач коммивояжера


In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Скрывает INFO-логи (оставляет WARNING и ERROR)
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
import tensorflow as tf
tf.get_logger().setLevel('ERROR')  # Скрывает большинство логов TensorFlow


2025-05-10 06:17:06.361022: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [2]:
import numpy as np
from tensorflow.keras.layers import Input, Dense, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tqdm import tqdm
#from python_tsp.exact import solve_tsp_dynamic_programming, solve_tsp_branch_and_bound
#from python_tsp.heuristics import solve_tsp_local_search, solve_tsp_simulated_annealing
#from python_tsp.heuristics import solve_tsp_lin_kernighan, solve_tsp_record_to_record
from sklearn.metrics import mean_absolute_percentage_error
import matplotlib.pyplot as plt
import pandas as pd

### Модель предсказывает какие дуги входят в маршрут, но не их последовательность

In [3]:
class TSPSolver:
    def __init__(self, num_cities, hidden_dim=256):
        self.num_cities = num_cities
        self.hidden_dim = hidden_dim
        self.model = self._build_model()
    
    def _build_model(self):
        # Вход: матрица расстояний (batch, cities, cities)
        inputs = Input(shape=(self.num_cities, self.num_cities))
        
        # Кодировщик на основе полносвязных слоев
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        x = LayerNormalization()(x)
        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)
        
        # Выходной слой - вероятности переходов
        logits = Dense(self.num_cities)(x)
        outputs = tf.keras.activations.softmax(logits)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(0.001), loss=self._custom_loss)
        return model
    
    def _custom_loss(self, y_true, y_pred):
        # y_true: маска посещенных городов (batch, cities, cities)
        # y_pred: вероятности переходов (batch, cities, cities)
        
        # Применяем маску к предсказаниям
        masked_pred = y_pred * y_true
        
        # Нормализуем вероятности
        masked_pred = masked_pred / (K.sum(masked_pred, axis=-1, keepdims=True) + K.epsilon())
        
        # Вычисляем кросс-энтропию
        loss = -K.sum(y_true * K.log(masked_pred + K.epsilon()), axis=-1)
        return K.mean(loss)
    
    def train(self, X_train, routes, epochs=50, batch_size=128):
        """
        X_train: матрицы расстояний (samples, cities, cities)
        routes: оптимальные маршруты (samples, cities)
        """
        # Создаем маски переходов для обучения
        y_masks = np.zeros_like(X_train)
        
        for i, route in enumerate(routes):
            for j in range(len(route)-1):
                from_city = route[j]
                to_city = route[j+1]
                y_masks[i, from_city, to_city] = 1
        
        self.model.fit(
            X_train,
            y_masks,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.1
        )

In [4]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


### Подготовка исходных данных 

In [6]:
X = np.load('X_20x20_fixed.npy')
Y = np.load('Y_20x20_fixed.npy')
border = 60000
X_train = X[:border]
Y_train = Y[:border]
X_test = X[border:]
Y_test = Y[border:]

In [7]:
# normalize the dataset
#X_train = X_train/X_train.max(axis=(1,2))[:,np.newaxis,np.newaxis]
#X_test = X_test/X_test.max(axis=(1,2))[:,np.newaxis,np.newaxis]

In [8]:
N = X_train.shape[1]
nlen = N*N
cnt = X_train.shape[0]
rand = np.random.RandomState(1)
N, cnt

(20, 60000)

### Инициализируем и обучаем модель

In [18]:
solver = TSPSolver(num_cities=N, hidden_dim = 512)
solver.train(X_train, Y_train, epochs=200, batch_size=3064)
#solver.model.load_weights('./my.weights.h5')

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

In [22]:
# solver.model.save_weights('./my.weights.h5')


### Проверка обученной модели

In [45]:
def predict_route(solver, dist_matrix, is_rnd = False, num_iter = 500):
    ap = solver.model(distance_matrix[np.newaxis, ...], training=False)[0].numpy()
    num_cities = dist_matrix.shape[0]
    best_route = []
    best_dist = np.inf
    for _ in range(num_iter):
        current = 0
        route = [current]
        total_dist = 0
        for _ in range(num_cities-1):
            probs = ap[current].copy()
            if is_rnd: probs = np.full_like(probs, 1) 
            # Маскируем посещенные города
            probs[route] = 0
            # Выбираем следующий город
            next_city = rand.choice(range(num_cities), p=(probs / np.sum(probs)))
            route.append(next_city)
            total_dist += dist_matrix[current, next_city]
            current = next_city
        total_dist += dist_matrix[route[-1], route[0]]
        if total_dist < best_dist:
            best_dist = total_dist
            best_route = route
    return np.array(best_route), best_dist

In [28]:
def predict_geedy_route(dist_matrix):
    num_cities = dist_matrix.shape[0]
    current = 0
    route = [current]
    next_city = np.argmax(dist_matrix[current])
    route.append(next_city)
    total_dist = dist_matrix[current, next_city]
    current = next_city
    for _ in range(num_cities-2):
        a = dist_matrix[current].copy()
        # Маскируем посещенные города
        a[route] = np.inf
        # Выбираем следующий город
        next_city = np.argmin(a)
        route.append(next_city)
        total_dist += dist_matrix[current, next_city]
        current = next_city
    total_dist += dist_matrix[route[-1], route[0]]
    return np.array(route), total_dist

In [10]:
def predict_beam_exp(solver, distance_matrix, beam_width=3):
    ap = solver.model(distance_matrix[np.newaxis, ...], training=False)[0].numpy() # solver.model.predict(distance_matrix[np.newaxis, ...], verbose=0)[0]
    num_cities = distance_matrix.shape[0]
    beams = [([0], set(range(0, num_cities)) - {0}, 0, 0)]
    delta = 1 / num_cities
    w = 0.8 # (i+1) * delta
    for i in range(1, num_cities):
        new_beams = []
        for route, remaining, dist, sap in beams:
            # Топ-K городов
            w = 0
            top_cities = sorted(remaining, key=lambda x: w*distance_matrix[route[-1],x] +
                                (1-w)*(1-ap[route[-1],x])
                               )[:beam_width] #ap[route[-1],x], reverse=True
            for city in top_cities:
                new_route = route + [city]
                new_remaining = remaining - {city}
                new_dist = dist + distance_matrix[new_route[-2], new_route[-1]]
                new_sap = sap + 1.0 - ap[new_route[-2], new_route[-1]]
                if i == num_cities-1:
                    new_dist += distance_matrix[new_route[-1], new_route[0]]
                    new_sap += 1.0-ap[new_route[-1], new_route[0]]
                new_beams.append((new_route, new_remaining, new_dist, new_sap))
        # Выбираем лучшие beam_width вариантов
        w = 0.8
        beams = sorted(new_beams, key=lambda x: w*x[2] + (1-w)*x[3])[:beam_width]
        
    # Лучший маршрут
    best_route, _, best_dist, _ = min(beams, key=lambda x: x[2])
    return best_route, best_dist

In [19]:
ld = []
lp = []
lg = []
lr = []
lb = []
lls = []
lsa = []
for i in tqdm(range(X_test.shape[0])):
    a = X_test[i]
    route = Y_test[i]
    distance = sum(a[route[j],route[j+1]] for j in range(N-1))+a[route[-1],route[0]]
    #_, total_dist = predict_route(solver, a, False, 500)
    #route, dist = predict_geedy_route(a)
    #route, rdist = predict_route(solver, a, True, 500)
    _, bdist = predict_beam_exp(solver, a, beam_width=4)
    #_, ls_dist = solve_tsp_local_search(a)
    #_, sa_dist = solve_tsp_simulated_annealing(a)
    ld.append(distance)
    #lp.append(total_dist)
    #lg.append(dist)
    #lr.append(rdist)
    lb.append(bdist)
    #lls.append(ls_dist)
    #lsa.append(sa_dist)
    
#Y_predict = np.array(lp)
Y_true = np.array(ld)
#Y_greedy = np.array(lg)
#Y_rnd = np.array(lr)
Y_beam = np.array(lb)
#Y_ls = np.array(lls)
#Y_sa = np.array(lsa)

100%|██████████| 3000/3000 [00:24<00:00, 121.17it/s]


In [20]:
#mean_absolute_percentage_error(Y_true, Y_predict), \
mean_absolute_percentage_error(Y_true, Y_beam) #, \
#mean_absolute_percentage_error(Y_true, Y_greedy), \
#mean_absolute_percentage_error(Y_true, Y_rnd), \
#mean_absolute_percentage_error(Y_true, Y_ls), \
#mean_absolute_percentage_error(Y_true, Y_sa)

0.07922057356307602

In [21]:
# Сколько случаев действительно плохого прогноза
#sum(((Y_predict - Y_true) / Y_true) > 0.2), \
sum(((Y_beam - Y_true) / Y_true) > 0.2) #, \
#sum(((Y_greedy - Y_true) / Y_true) > 0.2), \
#sum(((Y_rnd - Y_true) / Y_true) > 0.2), \
#sum(((Y_ls - Y_true) / Y_true) > 0.2), \
#sum(((Y_sa - Y_true) / Y_true) > 0.2)

6

In [33]:
Y_gr = np.argsort(solver.model(X_test, training=False).numpy()[:,0,:], axis=1)

In [44]:
sum((Y_gr[:,-1] == Y_test[:,1]) | (Y_gr[:,-2] == Y_test[:,1]) | (Y_gr[:,-3] == Y_test[:,1]) | (Y_gr[:,-4] == Y_test[:,1]))/3000

0.9116666666666666

In [255]:
%timeit predict_beam_exp(solver, a, beam_width=4)

6.62 ms ± 124 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%timeit predict_geedy_route(a)

In [50]:
%timeit predict_route(solver, a, False, 500)

358 ms ± 1.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit solve_tsp_dynamic_programming(a)

In [43]:
%timeit solve_tsp_branch_and_bound(a)

888 ms ± 5.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [221]:
%timeit solve_tsp_local_search(a)

6.02 ms ± 149 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%timeit solve_tsp_simulated_annealing(a)

In [284]:
Y_test[795], Y_true[795]

(array([ 0, 13,  8,  9, 17,  2, 19, 14, 12, 10, 16,  5,  3,  1,  6, 18, 11,
         4, 15,  7]),
 np.float64(6.568715342983737))

In [283]:
predict_beam_exp(solver, X_test[795], beam_width=4)

([0, 12, 10, 16, 15, 7, 19, 1, 6, 18, 5, 3, 13, 8, 9, 14, 4, 2, 17, 11],
 np.float64(7.588053043444236))

In [294]:
a = X_test[795]
ap = np.round(solver.model(a[np.newaxis, ...], training=False)[0].numpy(),2)
route = Y_test[795]
pd.DataFrame(ap)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.0,0.0,0.01,0.0,0.03,0.01,0.0,0.05,0.01,0.02,0.25,0.21,0.12,0.22,0.0,0.0,0.0,0.0,0.0,0.06
1,0.0,0.0,0.08,0.05,0.1,0.01,0.35,0.0,0.12,0.0,0.0,0.04,0.0,0.0,0.0,0.0,0.17,0.06,0.0,0.0
2,0.0,0.01,0.01,0.01,0.06,0.0,0.02,0.08,0.02,0.01,0.0,0.0,0.0,0.01,0.0,0.0,0.03,0.04,0.0,0.72
3,0.0,0.27,0.06,0.01,0.0,0.0,0.01,0.03,0.15,0.0,0.0,0.08,0.0,0.25,0.0,0.0,0.01,0.0,0.06,0.05
4,0.0,0.0,0.06,0.01,0.04,0.05,0.0,0.01,0.16,0.3,0.0,0.0,0.0,0.0,0.0,0.34,0.0,0.01,0.03,0.0
5,0.0,0.0,0.0,0.36,0.03,0.0,0.05,0.01,0.05,0.0,0.0,0.0,0.42,0.0,0.01,0.02,0.01,0.02,0.03,0.0
6,0.0,0.01,0.0,0.0,0.05,0.17,0.0,0.05,0.0,0.0,0.01,0.09,0.0,0.01,0.12,0.0,0.07,0.0,0.42,0.0
7,0.0,0.02,0.02,0.06,0.04,0.02,0.0,0.0,0.07,0.0,0.25,0.0,0.02,0.0,0.0,0.04,0.05,0.0,0.0,0.4
8,0.0,0.0,0.0,0.28,0.0,0.13,0.03,0.0,0.0,0.13,0.04,0.0,0.09,0.16,0.0,0.0,0.08,0.04,0.01,0.01
9,0.0,0.17,0.03,0.0,0.0,0.03,0.16,0.0,0.01,0.0,0.03,0.01,0.0,0.0,0.27,0.05,0.07,0.03,0.02,0.1


In [290]:
sum(ap[0] > 0.2)

np.int64(3)

In [296]:
[(i,j,ap[i,j]) for i,j in zip(route[:-1],route[1:])] 

[(np.int64(0), np.int64(13), np.float32(0.22)),
 (np.int64(13), np.int64(8), np.float32(0.29)),
 (np.int64(8), np.int64(9), np.float32(0.13)),
 (np.int64(9), np.int64(17), np.float32(0.03)),
 (np.int64(17), np.int64(2), np.float32(0.1)),
 (np.int64(2), np.int64(19), np.float32(0.72)),
 (np.int64(19), np.int64(14), np.float32(0.18)),
 (np.int64(14), np.int64(12), np.float32(0.42)),
 (np.int64(12), np.int64(10), np.float32(0.15)),
 (np.int64(10), np.int64(16), np.float32(0.18)),
 (np.int64(16), np.int64(5), np.float32(0.24)),
 (np.int64(5), np.int64(3), np.float32(0.36)),
 (np.int64(3), np.int64(1), np.float32(0.27)),
 (np.int64(1), np.int64(6), np.float32(0.35)),
 (np.int64(6), np.int64(18), np.float32(0.42)),
 (np.int64(18), np.int64(11), np.float32(0.27)),
 (np.int64(11), np.int64(4), np.float32(0.14)),
 (np.int64(4), np.int64(15), np.float32(0.34)),
 (np.int64(15), np.int64(7), np.float32(0.55))]

In [298]:
pd.DataFrame(np.round(a,2))

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
0,0.0,0.74,0.62,0.69,0.55,0.6,0.77,0.52,0.63,0.56,0.37,0.42,0.47,0.4,0.69,0.69,0.67,0.72,0.79,0.51
1,0.76,0.0,0.43,0.5,0.45,0.6,0.36,0.68,0.44,0.76,0.75,0.51,0.64,0.77,0.87,0.66,0.4,0.49,0.74,0.74
2,0.51,0.63,0.0,0.64,0.51,0.83,0.59,0.44,0.56,0.61,0.85,0.81,0.68,0.62,0.63,0.91,0.53,0.57,0.78,0.2
3,0.47,0.29,0.44,0.0,0.62,0.63,0.53,0.52,0.39,0.98,0.6,0.46,0.76,0.37,0.59,0.63,0.53,0.68,0.43,0.43
4,0.57,0.72,0.41,0.57,0.0,0.46,0.63,0.56,0.36,0.29,0.75,0.7,0.71,0.58,0.72,0.12,0.97,0.56,0.5,0.7
5,0.77,0.69,0.71,0.3,0.54,0.0,0.5,0.63,0.52,0.76,0.71,0.69,0.34,1.0,0.62,0.61,0.67,0.58,0.54,0.71
6,0.53,0.64,0.77,0.81,0.53,0.39,0.0,0.52,0.64,0.74,0.6,0.5,0.8,0.62,0.43,0.86,0.49,0.69,0.33,0.78
7,0.42,0.53,0.57,0.5,0.55,0.59,0.7,0.0,0.49,0.82,0.38,0.68,0.55,0.67,0.61,0.54,0.48,0.67,0.74,0.34
8,0.51,0.85,0.64,0.42,0.65,0.45,0.57,0.71,0.0,0.47,0.54,0.64,0.51,0.46,0.67,0.65,0.5,0.55,0.63,0.61
9,0.75,0.44,0.5,0.69,0.85,0.5,0.36,0.61,0.55,0.0,0.54,0.52,0.69,0.74,0.36,0.53,0.46,0.47,0.53,0.44
