# Обучение с учителем для решения задач коммивояжера


In [1]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # Скрывает INFO-логи (оставляет WARNING и ERROR)
import logging
logging.getLogger('tensorflow').setLevel(logging.ERROR)
import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)
import tensorflow as tf
tf.get_logger().setLevel('ERROR')  # Скрывает большинство логов TensorFlow


2025-04-29 10:05:35.262219: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1745921135.322048    4397 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1745921135.336534    4397 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1745921135.435289    4397 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745921135.435305    4397 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1745921135.435307    4397 computation_placer.cc:177] computation placer alr

In [2]:
import numpy as np
from tensorflow.keras.layers import Input, Dense, LayerNormalization
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import backend as K
from tqdm import tqdm
from python_tsp.exact import solve_tsp_dynamic_programming, solve_tsp_branch_and_bound
from python_tsp.heuristics import solve_tsp_local_search, solve_tsp_simulated_annealing
from python_tsp.heuristics import solve_tsp_lin_kernighan, solve_tsp_record_to_record
from sklearn.metrics import mean_absolute_percentage_error

### Модель предсказывает какие дуги входят в маршрут, но не их последовательность

In [3]:
class TSPSolver:
    def __init__(self, num_cities, hidden_dim=256):
        self.num_cities = num_cities
        self.hidden_dim = hidden_dim
        self.model = self._build_model()
    
    def _build_model(self):
        # Вход: матрица расстояний (batch, cities, cities)
        inputs = Input(shape=(self.num_cities, self.num_cities))
        
        # Кодировщик на основе полносвязных слоев
        x = Dense(self.hidden_dim, activation='relu')(inputs)
        x = LayerNormalization()(x)
        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)

        x = Dense(self.hidden_dim, activation='relu')(x)
        x = LayerNormalization()(x)
        
        # Выходной слой - вероятности переходов
        logits = Dense(self.num_cities)(x)
        outputs = tf.keras.activations.softmax(logits)
        
        model = Model(inputs=inputs, outputs=outputs)
        model.compile(optimizer=Adam(0.001), loss=self._custom_loss)
        return model
    
    def _custom_loss(self, y_true, y_pred):
        # y_true: маска посещенных городов (batch, cities, cities)
        # y_pred: вероятности переходов (batch, cities, cities)
        
        # Применяем маску к предсказаниям
        masked_pred = y_pred * y_true
        
        # Нормализуем вероятности
        masked_pred = masked_pred / (K.sum(masked_pred, axis=-1, keepdims=True) + K.epsilon())
        
        # Вычисляем кросс-энтропию
        loss = -K.sum(y_true * K.log(masked_pred + K.epsilon()), axis=-1)
        return K.mean(loss)
    
    def train(self, X_train, routes, epochs=50, batch_size=128):
        """
        X_train: матрицы расстояний (samples, cities, cities)
        routes: оптимальные маршруты (samples, cities)
        """
        # Создаем маски переходов для обучения
        y_masks = np.zeros_like(X_train)
        
        for i, route in enumerate(routes):
            for j in range(len(route)-1):
                from_city = route[j]
                to_city = route[j+1]
                y_masks[i, from_city, to_city] = 1
        
        self.model.fit(
            X_train,
            y_masks,
            epochs=epochs,
            batch_size=batch_size,
            validation_split=0.1
        )

In [4]:
print(tf.config.list_physical_devices('GPU'))

[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]


### Подготовка исходных данных 

In [5]:
X = np.load('X_20x20.npy')
Y = np.load('Y_20x20.npy')
border = 60000
X_train = X[:border]
Y_train = Y[:border]
X_test = X[border:]
Y_test = Y[border:]

In [6]:
# normalize the dataset
X_train = X_train/X_train.max(axis=(1,2))[:,np.newaxis,np.newaxis]
X_test = X_test/X_test.max(axis=(1,2))[:,np.newaxis,np.newaxis]

In [7]:
N = X_train.shape[1]
nlen = N*N
cnt = X_train.shape[0]
rand = np.random.RandomState(1)
N, cnt

(20, 60000)

### Инициализируем и обучаем модель

In [15]:
solver = TSPSolver(num_cities=N)
#solver.train(X_train, Y_train, epochs=50)
solver.model.load_weights('./my.weights.h5')

  saveable.load_own_variables(weights_store.get(inner_path))


In [13]:
#solver.model.save_weights('./my.weights.h5')


### Проверка обученной модели

In [45]:
def predict_route(solver, dist_matrix, is_rnd = False, num_iter = 500):
    ap = solver.model(distance_matrix[np.newaxis, ...], training=False)[0].numpy()
    num_cities = dist_matrix.shape[0]
    best_route = []
    best_dist = np.inf
    for _ in range(num_iter):
        current = 0
        route = [current]
        total_dist = 0
        for _ in range(num_cities-1):
            probs = ap[current].copy()
            if is_rnd: probs = np.full_like(probs, 1) 
            # Маскируем посещенные города
            probs[route] = 0
            # Выбираем следующий город
            next_city = rand.choice(range(num_cities), p=(probs / np.sum(probs)))
            route.append(next_city)
            total_dist += dist_matrix[current, next_city]
            current = next_city
        total_dist += dist_matrix[route[-1], route[0]]
        if total_dist < best_dist:
            best_dist = total_dist
            best_route = route
    return np.array(best_route), best_dist

In [28]:
def predict_geedy_route(dist_matrix):
    num_cities = dist_matrix.shape[0]
    current = 0
    route = [current]
    next_city = np.argmax(dist_matrix[current])
    route.append(next_city)
    total_dist = dist_matrix[current, next_city]
    current = next_city
    for _ in range(num_cities-2):
        a = dist_matrix[current].copy()
        # Маскируем посещенные города
        a[route] = np.inf
        # Выбираем следующий город
        next_city = np.argmin(a)
        route.append(next_city)
        total_dist += dist_matrix[current, next_city]
        current = next_city
    total_dist += dist_matrix[route[-1], route[0]]
    return np.array(route), total_dist

In [40]:
def predict_beam_search(solver, distance_matrix, beam_width=3):
    ap = solver.model(distance_matrix[np.newaxis, ...], training=False)[0].numpy() # solver.model.predict(distance_matrix[np.newaxis, ...], verbose=0)[0]
    num_cities = distance_matrix.shape[0]
    beams = [([0], set(range(0, num_cities)) - {0}, 0)]
    
    for i in range(1, num_cities):
        new_beams = []
        for route, remaining, dist in beams:
            # Топ-K городов 
            top_cities = sorted(remaining, key=lambda x: ap[route[-1],x], reverse=True)[:beam_width]
            for city in top_cities:
                new_route = route + [city]
                if next((x for x in new_beams if x[0] == new_route), False):
                    print(new_beams)
                    print(route)
                    print('-'*20)
                    continue
                new_remaining = remaining - {city}
                new_dist = dist + distance_matrix[new_route[-2], new_route[-1]]
                if i == num_cities-1:
                    new_dist += distance_matrix[new_route[-1], new_route[0]]
                new_beams.append((new_route, new_remaining, new_dist))
        # Выбираем лучшие beam_width вариантов
        beams = sorted(new_beams, key=lambda x: x[2])[:beam_width]
        
    # Лучший маршрут
    best_route, _, best_dist = min(beams, key=lambda x: x[2])
    return best_route, best_dist

In [42]:
ld = []
lp = []
lg = []
lr = []
lb = []
lls = []
lsa = []
for i in tqdm(range(X_test.shape[0])):
    a = X_test[i]
    route = Y_test[i]
    distance = sum(a[route[j],route[j+1]] for j in range(N-1))+a[route[-1],route[0]]
    #_, total_dist = predict_route(solver, a, False, 500)
    #route, dist = predict_geedy_route(a)
    #route, rdist = predict_route(solver, a, True, 500)
    _, bdist = predict_beam_search(solver, a, beam_width=4)
    #_, ls_dist = solve_tsp_local_search(a)
    #_, sa_dist = solve_tsp_simulated_annealing(a)
    ld.append(distance)
    #lp.append(total_dist)
    #lg.append(dist)
    #lr.append(rdist)
    lb.append(bdist)
    #lls.append(ls_dist)
    #lsa.append(sa_dist)
    
#Y_predict = np.array(lp)
Y_true = np.array(ld)
#Y_greedy = np.array(lg)
#Y_rnd = np.array(lr)
Y_beam = np.array(lb)
#Y_ls = np.array(lls)
#Y_sa = np.array(lsa)

100%|██████████| 3000/3000 [00:26<00:00, 113.32it/s]


In [43]:
#mean_absolute_percentage_error(Y_true, Y_predict), \
mean_absolute_percentage_error(Y_true, Y_beam) #, \
#mean_absolute_percentage_error(Y_true, Y_greedy), \
#mean_absolute_percentage_error(Y_true, Y_rnd), \
#mean_absolute_percentage_error(Y_true, Y_ls), \
#mean_absolute_percentage_error(Y_true, Y_sa)

0.08296926792485804

In [44]:
# Сколько случаев действительно плохого прогноза
#sum(((Y_predict - Y_true) / Y_true) > 0.2), \
sum(((Y_beam - Y_true) / Y_true) > 0.25) #, \
#sum(((Y_greedy - Y_true) / Y_true) > 0.2), \
#sum(((Y_rnd - Y_true) / Y_true) > 0.2), \
#sum(((Y_ls - Y_true) / Y_true) > 0.2), \
#sum(((Y_sa - Y_true) / Y_true) > 0.2)

np.int64(0)

In [53]:
%timeit predict_beam_search(solver, a, beam_width=4)

8.78 ms ± 8.53 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
%timeit predict_geedy_route(a)

In [50]:
%timeit predict_route(solver, a, False, 500)

358 ms ± 1.63 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit solve_tsp_dynamic_programming(a)

In [43]:
%timeit solve_tsp_branch_and_bound(a)

888 ms ± 5.48 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
%timeit solve_tsp_local_search(a)

In [None]:
%timeit solve_tsp_simulated_annealing(a)