In [31]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np


# Define the classifier network
class Classifier(nn.Module):
    def __init__(self):
        super(Classifier, self).__init__()
        self.fc1 = nn.Linear(2, 10)
        self.fc2 = nn.Linear(10, 1)
        self.sigmoid = nn.Sigmoid()

    def forward(self, x):
        x = torch.relu(self.fc1(x))
        x = self.sigmoid(self.fc2(x))
        return x

In [57]:
# dataset of lists of length 10, with random values between 0 and 1, and the labels are the sorted lists
def generate_data(n, seq_len):
    data = []
    labels = []
    for i in range(n):
        x = np.random.rand(seq_len)
        y = np.sort(x)
        data.append(x)
        labels.append(y)
    return (torch.from_numpy(np.array(data)).float(),
            torch.from_numpy(np.array(labels)).float())


def get_dataloader(seq_len):
    _data, _labels = generate_data(1000, seq_len=seq_len)
    # make a dataset and dataloader
    dataset = torch.utils.data.TensorDataset(_data, _labels)
    dataloader = torch.utils.data.DataLoader(dataset, batch_size=32, shuffle=True)
    return dataloader


In [30]:
classifier = Classifier()
optimizer = optim.Adam(classifier.parameters())
loss_fn = nn.MSELoss()

# Initialize lists for storing accuracy and loss values
accuracy_values = []
loss_values = []
dataloader = get_dataloader(2)
for epoch in range(20):
    epoch_accuracy = 0
    epoch_loss = 0
    for i, (unsorted, sorted) in enumerate(dataloader):
        optimizer.zero_grad()
        outputs = classifier(unsorted)
        # 1 if unsorted == sorted, 0 otherwise
        labels = torch.all(torch.eq(unsorted, sorted), dim=1).float().view(-1, 1)

        loss = loss_fn(outputs, labels)
        epoch_loss += loss.item()

        correct_predictions = torch.eq(outputs.round(), labels).sum().item()
        total_predictions = labels.numel()
        accuracy = correct_predictions / total_predictions
        epoch_accuracy += accuracy

        loss.backward()
        optimizer.step()

    # Calculate average accuracy and loss for the epoch
    epoch_accuracy /= len(dataloader)
    epoch_loss /= len(dataloader)

    # Store the average accuracy and loss
    accuracy_values.append(epoch_accuracy)
    loss_values.append(epoch_loss)

    # Print the average accuracy and loss for this epoch
    print(f'Epoch {epoch + 1}: Accuracy = {epoch_accuracy}, Loss = {epoch_loss}')

Epoch 1: Accuracy = 0.697, Loss = 0.23447467714548112
Epoch 2: Accuracy = 0.88, Loss = 0.18960616970062255
Epoch 3: Accuracy = 0.941, Loss = 0.14904254581034185
Epoch 4: Accuracy = 0.961, Loss = 0.11744172981381416
Epoch 5: Accuracy = 0.966, Loss = 0.09576213874667883
Epoch 6: Accuracy = 0.974, Loss = 0.08102585458569228
Epoch 7: Accuracy = 0.984, Loss = 0.07098157062381506
Epoch 8: Accuracy = 0.986, Loss = 0.06334030017349869
Epoch 9: Accuracy = 0.985, Loss = 0.057589791821315885
Epoch 10: Accuracy = 0.987, Loss = 0.05315470756031573
Epoch 11: Accuracy = 0.99, Loss = 0.049476609668228776
Epoch 12: Accuracy = 0.989, Loss = 0.046587928170803934
Epoch 13: Accuracy = 0.986, Loss = 0.043780564752058125
Epoch 14: Accuracy = 0.995, Loss = 0.04144174537039362
Epoch 15: Accuracy = 0.992, Loss = 0.0393932817876339
Epoch 16: Accuracy = 0.985, Loss = 0.03766566501588386
Epoch 17: Accuracy = 0.988, Loss = 0.036099636463914064
Epoch 18: Accuracy = 0.992, Loss = 0.03497345769218009
Epoch 19: Accurac

In [63]:
# Zeroth order optimization
classifier = Classifier()
optimizer = optim.Adam(classifier.parameters())
loss_fn = nn.MSELoss()

dataloader = get_dataloader(3)

def predict_sorted_list_batch(ann, unsorted, seq_len):
    assert seq_len == 3

    # Initialize the list of sorted values. array of length seq_len
    sorted_list = []
    for i in range(unsorted.shape[0]):
        unsorted_list = unsorted[i].tolist()
        sorted_list.append(predict_sorted_list(ann, torch.tensor(unsorted_list), seq_len))

    return torch.stack(sorted_list)

def predict_sorted_list(ann, unsorted, seq_len):
    assert seq_len == 3

    unsorted = unsorted.tolist()

    # Initialize the list of sorted values. array of length seq_len
    sorted_list = []
    first = unsorted.pop()
    sorted_list.append(first)

    second = unsorted.pop()
    # Use the neural network to predict if second < first
    if ann(torch.tensor([second, first])).item() > 0.5:
        sorted_list.insert(0, second)
    else:
        sorted_list.append(second)

    third = unsorted.pop()
    # Use the neural network to predict if third < first
    if ann(torch.tensor([third, sorted_list[0]])).item() > 0.5:
        sorted_list.insert(0, third)
    # Use the neural network to predict if third < second
    elif ann(torch.tensor([third, sorted_list[1]])).item() > 0.5:
        sorted_list.insert(1, third)
    else:
        sorted_list.append(third)

    return torch.tensor(sorted_list)


# Initialize lists for storing accuracy and loss values
accuracy_values = []
loss_values = []

# Initialize best loss to infinity
best_loss = float('inf')

for epoch in range(20):
    epoch_accuracy = 0
    epoch_loss = 0
    for i, (unsorted, sorted) in enumerate(dataloader):
        predicted_sorted = predict_sorted_list_batch(classifier, unsorted, 3)

        loss = loss_fn(predicted_sorted, sorted)
        epoch_loss += loss.item()

        correct_predictions = torch.eq(predicted_sorted, sorted).sum().item()
        total_predictions = sorted.numel()
        accuracy = correct_predictions / total_predictions
        epoch_accuracy += accuracy

        # Zeroth order optimization
        for param in classifier.parameters():
            # Save current parameters
            param_data_orig = param.data.clone()

            # Generate random direction
            random_direction = torch.randn_like(param)

            # Evaluate loss at a point in the direction of the vector
            param.data = param_data_orig + 0.001 * random_direction
            predicted_sorted = predict_sorted_list_batch(classifier, unsorted, 3)
            loss_new_direction = loss_fn(predicted_sorted, sorted)

            # If the loss decreases, move in that direction
            if loss_new_direction.item() < best_loss:
                best_loss = loss_new_direction.item()
            else:  # If the loss increases, move in the opposite direction
                param.data = param_data_orig - 0.001 * random_direction
                predicted_sorted = predict_sorted_list_batch(classifier, unsorted, 3)
                loss_new_direction = loss_fn(predicted_sorted, sorted)

                # If the loss decreases, move in that direction
                if loss_new_direction.item() < best_loss:
                    best_loss = loss_new_direction.item()
                else:  # If the loss increases, do not move
                    param.data = param_data_orig

    # Calculate average accuracy and loss for the epoch
    epoch_accuracy /= len(dataloader)
    epoch_loss /= len(dataloader)

    # Store the average accuracy and loss
    accuracy_values.append(epoch_accuracy)
    loss_values.append(epoch_loss)

    # Print the average accuracy and loss for this epoch
    print(f'Epoch {epoch + 1}: Accuracy = {epoch_accuracy}, Loss = {epoch_loss}')

Epoch 1: Accuracy = 0.32682291666666663, Loss = 0.11083674291148782
Epoch 2: Accuracy = 0.32682291666666674, Loss = 0.11047123605385423
Epoch 3: Accuracy = 0.32291666666666663, Loss = 0.11114918836392462
Epoch 4: Accuracy = 0.3238932291666667, Loss = 0.11278517101891339
Epoch 5: Accuracy = 0.3277994791666666, Loss = 0.11181307106744498
Epoch 6: Accuracy = 0.3248697916666667, Loss = 0.11041212268173695
Epoch 7: Accuracy = 0.3268229166666667, Loss = 0.11034230468794703
Epoch 8: Accuracy = 0.3238932291666667, Loss = 0.11260808259248734
Epoch 9: Accuracy = 0.32682291666666663, Loss = 0.10981816879939288
Epoch 10: Accuracy = 0.32486979166666663, Loss = 0.11370224133133888
Epoch 11: Accuracy = 0.32096354166666663, Loss = 0.11214820574969053


KeyboardInterrupt: 