In [1]:
!git clone https://github.com/marvosyntactical/swarms/

fatal: destination path 'swarms' already exists and is not an empty directory.


In [2]:
import os
os.chdir("swarms")

In [3]:
!git pull

remote: Enumerating objects: 5, done.[K
remote: Counting objects:  20% (1/5)[Kremote: Counting objects:  40% (2/5)[Kremote: Counting objects:  60% (3/5)[Kremote: Counting objects:  80% (4/5)[Kremote: Counting objects: 100% (5/5)[Kremote: Counting objects: 100% (5/5), done.[K
remote: Compressing objects: 100% (1/1)[Kremote: Compressing objects: 100% (1/1), done.[K
remote: Total 3 (delta 2), reused 3 (delta 2), pack-reused 0[K
Unpacking objects:  33% (1/3)Unpacking objects:  66% (2/3)Unpacking objects: 100% (3/3)Unpacking objects: 100% (3/3), 430 bytes | 430.00 KiB/s, done.
From https://github.com/marvosyntactical/swarms
   636c85b..6c89be5  main       -> origin/main
Updating 636c85b..6c89be5
Fast-forward
 swarm.py | 25 [32m++++++++++++[m[31m-------------[m
 1 file changed, 12 insertions(+), 13 deletions(-)


In [4]:
!pip install neptune



In [5]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
import contextlib
from pprint import pprint


from swarm import Swarm, PSO, SwarmGrad, SwarmGradAccel, CBO, EGICBO, PlanarSwarm

import argparse
import neptune

In [6]:
class SmallLinear(nn.Module):
    def __init__(self):
        super(SmallLinear, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 100)
        self.sigmoid = nn.Sigmoid()
        self.fc2 = nn.Linear(100, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.sigmoid(self.fc1(x))
        x = self.fc2(x)
        x = F.log_softmax(x, dim=1)
        return x

In [7]:
def freqs(t):
    fs = {}
    n = t.nelement()
    for i in range(10):
        fs[i] = f"{100*(t == i).sum().item()/n}%"
    return fs


def preprocess():

    # Load and preprocess the MNIST dataset
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,))
    ])

    train_dataset = datasets.MNIST('data', train=True, download=True, transform=transform)
    test_dataset = datasets.MNIST('data', train=False, transform=transform)

    train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=64, shuffle=True)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=1000, shuffle=False)

    return transform, train_dataset, test_dataset, train_loader, test_loader

In [8]:
def init_neptune(args):

    # TODO figure out how to hide this ...
    tok = "eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiIzNjk4NmM5ZC01ZGZiLTQzZGYtYTBlOC1hM2M2MTU4NWQyODAifQ=="

    run = neptune.init_run(
        project="halcyon/swarm",
        api_token=tok,
    )

    run["parameters/gradient"] = args.gradient
    run["parameters/optim"] = args.optim
    run["parameters/N"] = args.N
    run["parameters/epochs"] = args.epo
    run["parameters/stop"] = args.stop

    return run

In [9]:
class Args:
  # argparse workaround / replacement
  def __init__(
      self,
      gradient: bool = False,
      optim: str = "sga",
      N: int = 10,
      epo: int = 1,
      stop: int = 1e15,
      neptune: bool = False,
      c1: float = 1.0,
      c2: float = 1.0,
      inertia: float = 0.1,
      beta1: float = 0.9,
      beta2: float = 0.99,
      lr: float = 1.0,
      lamda: float = 1.5,
      sigma: float = .5,
      noise: str = "component",
      kappa: float = 1e5,
      slack: float = 10.,
      tau: float = .2,
      hess: bool = False,
    ):
    self.gradient = gradient
    assert optim in [
      "cbo",
      "egi",
      "pso",
      "sg",
      "sga",
      "pla",
    ]
    self.optim = optim
    self.N = N
    self.epo = epo
    self.stop = stop
    self.neptune = neptune
    self.c1 = c1
    self.c2 = c2
    self.inertia = inertia
    self.beta1 = beta1
    self.beta2 = beta2
    self.lr = lr
    self.lamda = lamda
    self.sigma = sigma
    self.noise = noise
    self.kappa = kappa
    self.slack = slack
    self.tau = tau
    self.hess = hess

In [10]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
print(device)

cuda


In [11]:
args = Args(
    N = 50,
    epo = 10,
    optim = "sga",
    c1 = 1.0,
    c2 = 0.,
    beta1 = 0.8,
    beta2 = 0.95,
    neptune = True
)


if args.neptune:
    run = init_neptune(args)
else:
    run = {}

# Initialize the model and optimizer

if args.gradient:
    model = SmallLinear().to(device)
    optimizer = torch.optim.Adam(
        model.parameters(),
        lr=0.01,
    )

else:
    models = [SmallLinear().to(device) for _ in range(args.N)]
    model = models[0]

    opt = args.optim
    if opt == "cbo":
        optimizer = CBO(
            models,
            lambda_=args.lamda,
            sigma=args.sigma,
            noise_type=args.noise,
            device=device
        )
        run["parameters/lambda"] = args.lamda
        run["parameters/sigma"] = args.sigma
        run["parameters/noise"] = args.noise

    elif opt == "pso":
        optimizer = PSO(
            models,
            c1=args.c1,
            c2=args.c2,
            inertia=args.inertia,
            device=device
        )
        run["parameters/c1"] = args.c1
        run["parameters/c2"] = args.c2
        run["parameters/inertia"] = args.inertia

    elif opt == "egi":
        optimizer = EGICBO(
            models,
            lambda_=args.lamda,
            sigma=args.sigma,
            noise_type=args.noise,
            kappa=args.kappa,
            tau=args.tau,
            slack=args.slack,
            extrapolate=args.hess,
            device=device
        )
        run["parameters/lambda"] = args.lamda
        run["parameters/sigma"] = args.sigma
        run["parameters/kappa"] = args.kappa
        run["parameters/tau"] = args.tau
        run["parameters/slack"] = args.slack
        run["parameters/hess"] = args.hess

    elif opt == "sg":
        optimizer = SwarmGrad(
            models,
            c1=args.c1,
            c2=args.c2,
            inertia=args.inertia,
            device=device
        )
        run["parameters/c1"] = args.c1
        run["parameters/c2"] = args.c2
        run["parameters/inertia"] = args.inertia

    elif opt == "sga":
        optimizer = SwarmGradAccel(
            models,
            c1=args.c1,
            c2=args.c2,
            beta1=args.beta1,
            beta2=args.beta2,
            lr=args.lr,
            device=device
        )
        run["parameters/c1"] = args.c1
        run["parameters/c2"] = args.c2
        run["parameters/beta1"] = args.beta1
        run["parameters/beta2"] = args.beta2

    elif opt == "pla":
        optimizer = PlanarSwarm(
            models,
            device=device
        )
    else:
        raise NotImplementedError(f"Optim={opt}")

# Prep Data
transform, train_dataset, test_dataset, train_loader, test_loader = preprocess()



[neptune] [info   ] Neptune initialized. Open in the app: https://app.neptune.ai/halcyon/swarm/e/SWRM-49


In [None]:
# Train the model

# Dont compute gradients in case of Swarm optimizer
train_context = torch.no_grad if not args.gradient else contextlib.nullcontext

with train_context():
    for epoch in range(args.epo):
        model.train()
        for batch_idx, (data, target) in enumerate(train_loader):

            if batch_idx+len(train_loader)*epoch > args.stop:
                break

            data, target = data.to(device), target.to(device)

            if args.gradient:
                optimizer.zero_grad()

                output = model(data)
                loss = F.nll_loss(output, target)
                loss.backward()

                optimizer.step()
            else:
                loss = optimizer.step(
                    F.nll_loss,
                    model,
                    data,
                    target,
                    # lambda: F.nll_loss(model(data), target)
                )

                if args.neptune:
                    for stat, val in optimizer.stats().items():
                        run[f"train/{stat}"].append(val)

            if args.neptune:
                run["train/loss"].append(loss.item())


            # if batch_idx % 20 == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

        # Evaluate the model after each epoch
        model.eval()
        test_loss = 0
        correct = 0

        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

        # Sanity Check: To see if net just learned to output one digit always
        pprint(freqs(pred))

        test_loss /= len(test_loader.dataset)
        accuracy = 100. * correct / len(test_loader.dataset)

        print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
            test_loss, correct, len(test_loader.dataset), accuracy))

        if args.neptune:
            run["test/loss"].append(test_loss)
            run["test/acc"].append(accuracy)

if args.neptune:
    run.stop()

{0: '16.4%',
 1: '15.0%',
 2: '7.9%',
 3: '6.5%',
 4: '5.5%',
 5: '6.2%',
 6: '8.8%',
 7: '15.4%',
 8: '10.9%',
 9: '7.4%'}

Test set: Average loss: 3.4404, Accuracy: 4582/10000 (46%)

