In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import random
from collections import deque
import numpy as np

def is_sorted(arr):
    # Проверяем, отсортирован ли массив
    return all(arr[i] <= arr[i + 1] for i in range(len(arr) - 1))

class PermutationTransformer(nn.Module):
    def __init__(self, n, d_model=64, nhead=4, num_layers=1, dim_feedforward=128):
        super(PermutationTransformer, self).__init__()
        
        self.n = n
        self.d_model = d_model
        
        # Embedding слой для преобразования входных признаков
        self.feature_embedding = nn.Linear(3, d_model)  # 3 признака: разница с позицией, разница справа, разница слева
        
        # Positional encoding
        self.pos_encoder = nn.Parameter(torch.zeros(1, n, d_model))
        
        # Transformer encoder
        encoder_layers = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True
        )
        self.transformer_encoder = nn.TransformerEncoder(encoder_layers, num_layers=num_layers)
        
        # Выходной слой для 3 действий
        self.output = nn.Sequential(
            nn.Linear(n * d_model, dim_feedforward),
            nn.SELU(),
            nn.Linear(dim_feedforward, dim_feedforward),
            nn.SELU(),
            nn.Linear(dim_feedforward, dim_feedforward),
            nn.SELU(),
            nn.Linear(dim_feedforward, 1)
        )
        
    def forward(self, x):
        batch_size = x.size(0)
        
        # Преобразуем входные данные в матрицу признаков
        features = torch.zeros(batch_size, self.n, 3, device=x.device)
                
        # Разница с позицией с учетом цикличности и знака
        positions = torch.arange(self.n, device=x.device).expand(batch_size, -1)
        raw_diff = x - positions
        abs_diff = torch.abs(raw_diff) # обычное расстояние
        cyclic_diff = torch.where(
            abs_diff > self.n // 2,
            torch.where(raw_diff > 0, -(self.n - abs_diff), self.n - abs_diff),
            raw_diff
        )
        features[:, :, 0] = cyclic_diff
       
        # Разница с правым элементом
        features[:, :, 1] = x - torch.roll(x, shifts=1, dims=1)
        
        # Разница с левым элементом
        features[:, :, 2] = x - torch.roll(x, shifts=-1, dims=1)
        
        # Преобразуем признаки через embedding
        embedded = self.feature_embedding(features)
        
        # Добавляем positional encoding
        #embedded = embedded + self.pos_encoder
        
        # Пропускаем через transformer
        transformer_output = self.transformer_encoder(embedded)
        
        # Flatten и выходной слой
        flat = transformer_output.reshape(batch_size, -1)
        q_values = self.output(flat)
        
        return q_values

In [None]:
import torch
import torch.nn as nn
import numpy as np
from collections import defaultdict

def apply_action(perm, action):
    perm = perm.copy()
    if action == 0:  # swap 0 and 1
        perm[0], perm[1] = perm[1], perm[0]
    elif action == 1:  # rotate right
        perm = np.roll(perm, 1)
    else:  # rotate left
        perm = np.roll(perm, -1)
    return perm

def tuple_perm(perm):
    return tuple(perm)

def generate_training_sample(n, k):
    """
    Генерирует случайную перестановку, применяя не более k случайных ходов
    к отсортированной последовательности
    """
    perm = np.arange(n)
    seen_states = {tuple_perm(perm)}
    actual_moves = 0

    perms = []
    targets = [] 
    
    for _ in range(k):
        possible_actions = []
        # Проверяем все возможные действия
        for action in range(3):
            if action == 0 and perm[0] > perm[1]:
                continue
            next_perm = apply_action(perm, action)
            if tuple_perm(next_perm) not in seen_states:
                possible_actions.append(action)
        
        # Если нет доступных ходов, прерываем
        if not possible_actions:
            break
            
        # Выбираем случайное действие
        action = np.random.choice(possible_actions)
        perm = apply_action(perm, action)
        seen_states.add(tuple_perm(perm))
        actual_moves += 1

        perms.append(perm)
        targets.append(actual_moves)

    return perms, targets


# Пример улучшенной версии с батчами:
def generate_batch(n, max_moves, batch_size):
    perms = []
    targets = []
    
    for _ in range(batch_size):
        k = np.random.randint(max_moves-1, max_moves + 1)
        perm, moves = generate_training_sample(n, k)
        perms.extend(perm)
        targets.extend(moves)
    
    return torch.FloatTensor(np.array(perms)), torch.FloatTensor(targets)

def train_model(model, n, max_moves, num_epochs, batch_size, learning_rate):
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    model = model.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.SmoothL1Loss()
    
    for epoch in range(num_epochs):
        model.train()
        total_loss = 0
        num_batches = 0
        
        # Генерируем батчи
        for i in range(10):
            batch_perms, batch_targets = generate_batch(n, max_moves, batch_size)
            batch_perms = batch_perms.to(device)
            batch_targets = batch_targets.to(device).unsqueeze(1)
            
            optimizer.zero_grad()
            outputs = model(batch_perms)
            loss = criterion(outputs, batch_targets)
            loss.backward()
            optimizer.step()
            
            total_loss += loss.item()
            num_batches += 1
        
        # Выводим статистику эпохи
        avg_loss = total_loss / num_batches
        if epoch % 10 == 0:
            print(f"Epoch {epoch}, Average Loss: {avg_loss:.4f}")
            
        # Проверяем модель на нескольких примерах
        if epoch % 100 == 0:
            model.eval()
            with torch.no_grad():
                for _ in range(1):
                    #k = np.random.randint(1, max_moves + 1)
                    k = max_moves
                    test_perm, actual_moves = generate_training_sample(n, k)
                    test_tensor = torch.FloatTensor(np.array(test_perm)).to(device)
                    predicted_moves = model(test_tensor)
                    for j in range(len(test_perm)):                    
                        print(f"Permutation: {test_perm[j]}")
                        print(f"Actual moves: {actual_moves[j]}, Predicted: {predicted_moves[j].item():.2f}")
                print()

n = 100  # размер перестановки
max_moves = n*(n-1)//2  # максимальное число ходов
num_epochs = 1000
batch_size = 1
learning_rate = 0.0003

model = PermutationTransformer(n).cuda()
train_model(model, n, max_moves, num_epochs, batch_size, learning_rate)

In [6]:
from collections import deque

def beam_search_sorting(permutation, beam_width=5):
    n = len(permutation)
    target = list(range(0, n))
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def average_neighbor_difference(perm):
        total_diff = 0
        
        # Считаем разницы между соседними элементами
        for i in range(n-1):
            total_diff += abs(perm[i] - perm[i+1])
        
        # Добавляем разницу между последним и первым элементом
        total_diff += abs(perm[-1] - perm[0])
        
        return total_diff / n

    def count_monotonic_sections(perm):
        # Создаем расширенную перестановку для учета цикличности
        extended_perm = perm + perm[0:1]
        
        sections = 0
        increasing = None
        
        for i in range(n):
            if increasing is None:  # Первое сравнение
                increasing = extended_perm[i] < extended_perm[i + 1]
            elif increasing and extended_perm[i] > extended_perm[i + 1]:  # Смена возрастания на убывание
                sections += 1
                increasing = False
            elif not increasing and extended_perm[i] < extended_perm[i + 1]:  # Смена убывания на возрастание
                sections += 1
                increasing = True
                
        return sections / n
    
    def apply_actions(state):
        # Generate new states based on the allowed actions
        states = []
        # 1. Cyclic shift right
        new_state = state[-1:] + state[:-1]
        states.append((new_state, "R"))
        
        # 2. Cyclic shift left
        new_state = state[1:] + state[:1]
        states.append((new_state, "L"))
        
        # 3. Swap first two elements
        if len(state) > 1:
            new_state = state[:]
            new_state[0], new_state[1] = new_state[1], new_state[0]
            states.append((new_state, "X"))
        
        return states

    def apply_action(state, action):
        # 1. Cyclic shift right
        if action == 'R':
            new_state = state[-1:] + state[:-1]
            return new_state
        elif action == 'L':        
            # 2. Cyclic shift left
            new_state = state[1:] + state[:1]
            return new_state
            
        new_state = state[:]
        new_state[0], new_state[1] = new_state[1], new_state[0]
        return new_state
            
    
    # Priority queue for the beam search; stores tuples of (cumulative cost, path, current state)
    queue = deque([(0, [], permutation)])
    seen = set()

    max_actions = 0
    q_values = []
    
    while queue:
        # Limit the size of the queue as per beam width
        queue = deque(sorted(list(queue), key=lambda x: x[0])[:beam_width])
        next_queue = deque()
        
        for cost, path, current in queue:
            if len(path) > max_actions:
                max_actions = len(path)
                if max_actions % 100 == 0:
                    print('new max len', len(path), len(queue), len(seen), np.min(q_values))
                q_values = []
            if len(path) > 40000:
                #print('long path', queue)
                return None
            if current == target:
                return path  # Return the path to sorted order


            for action in ['L','R','X']:
                if action == 'L' and len(path) > 0 and path[-1] == 'R':
                    continue
                if action == 'R' and len(path) > 0 and path[-1] == 'L':
                    continue
                if action == 'X' and current[0] < current[1]:
                    continue

                next_state = apply_action(current, action)

                if tuple(next_state) not in seen:
                    seen.add(tuple(next_state))

                    with torch.no_grad():
                        t_state = torch.FloatTensor(next_state).unsqueeze(0).to(device)
                        q_value = model(t_state).squeeze().item()
                        q_values.append(q_value)
                    
                    total_cost = cost + 1 + q_value
                    next_queue.append((total_cost, path + [action], next_state))
        
        queue = next_queue

    return None  # Return None if no solution is found

# Пример использования
n = 100
p = np.arange(n)
p[0], p[1] = p[1], p[0]
i = 2
while i < n-i+1:
    p[i], p[n-i+1] = p[n-i+1], p[i]
    i += 1
permutation = p.tolist()

k = 3000  # Ширина "луча"
model.eval();
path = beam_search_sorting(permutation, beam_width=k)
print("Кратчайший путь:", len(path))
print(path)

new max len 100 3000 487511 4466.4130859375
new max len 200 3000 955409 4271.8447265625
new max len 300 3000 1387369 4165.74755859375
new max len 400 3000 1824721 4076.861083984375
new max len 500 3000 2292207 3815.860595703125
new max len 600 3000 2721406 3716.184326171875
new max len 700 3000 3133897 3666.57177734375
new max len 800 3000 3552861 3584.751220703125
new max len 900 3000 4028008 3283.881103515625
new max len 1000 3000 4460160 3182.252685546875
new max len 1100 3000 4886775 3054.591064453125
new max len 1200 3000 5307003 2945.5712890625
new max len 1300 3000 5710563 2885.424560546875
new max len 1400 3000 6121108 2694.521240234375
new max len 1500 3000 6536970 2695.101318359375
new max len 1600 3000 6927630 2545.593505859375
new max len 1700 3000 7312037 2276.962890625
new max len 1800 3000 7696977 2181.089599609375
new max len 1900 3000 8079028 2186.112548828125
new max len 2000 3000 8456150 2097.048095703125
new max len 2100 3000 8828585 1978.5848388671875
new max len 2

In [7]:
def perform_action(state, action):
    temp_arr = state.copy()
    if action == 'L':
        temp_arr = temp_arr[1:] + [temp_arr[0]]
    elif action == 'R':
        temp_arr = [temp_arr[-1]] + temp_arr[:-1]
    elif action == 'X':
        temp_arr[0], temp_arr[1] = temp_arr[1], temp_arr[0]

    return temp_arr

p = permutation.copy()
s = p.copy()
for action in path:
    s = perform_action(s, action)
print(p)
print(s)


[1, 0, 99, 98, 97, 96, 95, 94, 93, 92, 91, 90, 89, 88, 87, 86, 85, 84, 83, 82, 81, 80, 79, 78, 77, 76, 75, 74, 73, 72, 71, 70, 69, 68, 67, 66, 65, 64, 63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, 50, 49, 48, 47, 46, 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 33, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2]
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99]


In [8]:
torch.save(model.state_dict(), 'model_100.pth')

In [10]:
len([x for x in path if x=='X'])

2450