Skip to content

Commit

Permalink
wip neural arquitechture search
Browse files Browse the repository at this point in the history
  • Loading branch information
tartavull committed May 13, 2023
1 parent 4d7e0e6 commit 068bc0c
Show file tree
Hide file tree
Showing 12 changed files with 838 additions and 16 deletions.
4 changes: 4 additions & 0 deletions flake.nix
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#mujoco = callPackage ./nix/mujoco.nix { };
trimesh = callPackage ./nix/trimesh.nix { };
brax = callPackage ./nix/brax.nix { };
mplcursors = callPackage ./nix/mplcursors.nix { };
};

core-python = pkgs.python3.withPackages (ps: with ps; [
Expand All @@ -34,15 +35,18 @@
pandas
matplotlib
pytorch
torchvision
pytest
tqdm
rich
networkx
# wandb # test failing

# only supported on linux
jaxlib
jax
overlay.brax
overlay.mplcursors
]);


Expand Down
31 changes: 31 additions & 0 deletions nix/mplcursors.nix
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{ lib
, buildPythonPackage
, fetchPypi
, setuptools_scm
, pytest
, pytestCheckHook
, matplotlib
}:

buildPythonPackage rec {
pname = "mplcursors";
version = "0.5";

src = fetchPypi {
inherit pname version;
hash = "sha256-w92Ej+z0b7xIdAikBgouEDpVZ8EERN8OrIswjEBU5U0=";
};

nativeBuildInputs = [ setuptools_scm ];
propagatedBuildInputs = [ matplotlib ];
checkInputs = [ pytest pytestCheckHook ];

doCheck = false;

meta = with lib; {
description = "Interactive data selection cursors for Matplotlib";
homepage = "https://github.com/anntzer/mplcursors";
license = licenses.mit;
maintainers = with maintainers; [ ];
};
}
3 changes: 0 additions & 3 deletions src/__init__.py

This file was deleted.

12 changes: 0 additions & 12 deletions src/core.py

This file was deleted.

201 changes: 201 additions & 0 deletions src/mnist/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,201 @@
from __future__ import print_function

import argparse

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.optim.lr_scheduler import StepLR
from torchvision import datasets, transforms


class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
self.conv1 = nn.Conv2d(1, 32, 3, 1)
self.conv2 = nn.Conv2d(32, 64, 3, 1)
self.dropout1 = nn.Dropout(0.25)
self.dropout2 = nn.Dropout(0.5)
self.fc1 = nn.Linear(9216, 128)
self.fc2 = nn.Linear(128, 10)

def forward(self, x):
x = self.conv1(x)
x = F.relu(x)
x = self.conv2(x)
x = F.relu(x)
x = F.max_pool2d(x, 2)
x = self.dropout1(x)
x = torch.flatten(x, 1)
x = self.fc1(x)
x = F.relu(x)
x = self.dropout2(x)
x = self.fc2(x)
output = F.log_softmax(x, dim=1)
return output


def train(args, model, device, train_loader, optimizer, epoch):
model.train()
for batch_idx, (data, target) in enumerate(train_loader):
data, target = data.to(device), target.to(device)
optimizer.zero_grad()
output = model(data)
loss = F.nll_loss(output, target)
loss.backward()
optimizer.step()
if batch_idx % args.log_interval == 0:
print(
"Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}".format(
epoch,
batch_idx * len(data),
len(train_loader.dataset),
100.0 * batch_idx / len(train_loader),
loss.item(),
)
)
if args.dry_run:
break


def test(model, device, test_loader):
model.eval()
test_loss = 0
correct = 0
with torch.no_grad():
for data, target in test_loader:
data, target = data.to(device), target.to(device)
output = model(data)
test_loss += F.nll_loss(
output, target, reduction="sum"
).item() # sum up batch loss
pred = output.argmax(
dim=1, keepdim=True
) # get the index of the max log-probability
correct += pred.eq(target.view_as(pred)).sum().item()

test_loss /= len(test_loader.dataset)

print(
"\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n".format(
test_loss,
correct,
len(test_loader.dataset),
100.0 * correct / len(test_loader.dataset),
)
)


def main():
# Training settings
parser = argparse.ArgumentParser(description="PyTorch MNIST Example")
parser.add_argument(
"--batch-size",
type=int,
default=64,
metavar="N",
help="input batch size for training (default: 64)",
)
parser.add_argument(
"--test-batch-size",
type=int,
default=1000,
metavar="N",
help="input batch size for testing (default: 1000)",
)
parser.add_argument(
"--epochs",
type=int,
default=14,
metavar="N",
help="number of epochs to train (default: 14)",
)
parser.add_argument(
"--lr",
type=float,
default=1.0,
metavar="LR",
help="learning rate (default: 1.0)",
)
parser.add_argument(
"--gamma",
type=float,
default=0.7,
metavar="M",
help="Learning rate step gamma (default: 0.7)",
)
parser.add_argument(
"--no-cuda", action="store_true", default=False, help="disables CUDA training"
)
parser.add_argument(
"--no-mps",
action="store_true",
default=False,
help="disables macOS GPU training",
)
parser.add_argument(
"--dry-run",
action="store_true",
default=False,
help="quickly check a single pass",
)
parser.add_argument(
"--seed", type=int, default=1, metavar="S", help="random seed (default: 1)"
)
parser.add_argument(
"--log-interval",
type=int,
default=10,
metavar="N",
help="how many batches to wait before logging training status",
)
parser.add_argument(
"--save-model",
action="store_true",
default=False,
help="For Saving the current Model",
)
args = parser.parse_args()
use_cuda = not args.no_cuda and torch.cuda.is_available()
use_mps = not args.no_mps and torch.backends.mps.is_available()

torch.manual_seed(args.seed)

if use_cuda:
device = torch.device("cuda")
elif use_mps:
device = torch.device("mps")
else:
device = torch.device("cpu")

train_kwargs = {"batch_size": args.batch_size}
test_kwargs = {"batch_size": args.test_batch_size}
if use_cuda:
cuda_kwargs = {"num_workers": 1, "pin_memory": True, "shuffle": True}
train_kwargs.update(cuda_kwargs)
test_kwargs.update(cuda_kwargs)

transform = transforms.Compose(
[transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
)
dataset1 = datasets.MNIST("../data", train=True, download=True, transform=transform)
dataset2 = datasets.MNIST("../data", train=False, transform=transform)
train_loader = torch.utils.data.DataLoader(dataset1, **train_kwargs)
test_loader = torch.utils.data.DataLoader(dataset2, **test_kwargs)

model = Net().to(device)
optimizer = optim.Adadelta(model.parameters(), lr=args.lr)

scheduler = StepLR(optimizer, step_size=1, gamma=args.gamma)
for epoch in range(1, args.epochs + 1):
train(args, model, device, train_loader, optimizer, epoch)
test(model, device, test_loader)
scheduler.step()

if args.save_model:
torch.save(model.state_dict(), "mnist_cnn.pt")


if __name__ == "__main__":
main()
57 changes: 57 additions & 0 deletions src/module.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
class Region:
"""
This is the basic unit of computation which evolves through:
- Randon mutation
- A global reward signal
- Backpropagation
"""

def __init__(self) -> None:

self._input = None
# the module optionally receives a tensor as an input. The module will
# define it's architecture to be able to process any input shape.

self._reward = None
# the module learnable parameters can be updated through
# backpropagation and from a global reward signal.
# this must be a scalar

self._gradient = None
# The gradient used to update the parameters
# this must have the same shape as output

self._mutation = 0.5
# [0,1] this is the global mutation rate which affects how quickly the
# module evolves. zero means that the module is frozen, one means that
# the module changes in every opportunity.

def __str__(self):
return f"""
Region:
Mutation rate: {self._mutation}
"""


class Brain:
"""
This is a directed acyclic graph of `MutableModule`.
It randomly creates new nodes and edges between them.
It also responsible of making the shapes work out.
We'll probably want a Tensorboard-like visualization of this graph.
"""

def __init__(self) -> None:

self._mutation = 0.5
# [0,1] this is the global mutation rate which affects how quickly the
# module evolves. zero means that the module is frozen, one means that
# the module changes in every opportunity.


# The simplest thing we can implement is a fully connected network were we mutate the weights
# and that's the only learning method

if __name__ == "__main__":
print(Region())
32 changes: 32 additions & 0 deletions src/nas/evolve.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
import random
from copy import deepcopy
from typing import Callable

from nas import Net


def select_parents(population, fitness_scores):
"""
Select the top 20% fittest individuals in the population
"""
sorted_population = sorted(
population,
key=lambda individual: fitness_scores[population.index(individual)],
reverse=False,
)
return sorted_population[: len(sorted_population) // 5]


def evolve(constructor, fitness_fn: Callable, population_size: int, steps: int):
""" """
population = [constructor() for _ in range(population_size)]
for i in range(steps):
fitness_scores = [fitness_fn(individual) for individual in population]
print(fitness_scores)
next_gen = select_parents(population, fitness_scores)
while len(next_gen) < population_size:
individual = deepcopy(random.choice(next_gen))
next_gen.append(individual.mutate())
population = next_gen

return min(population, key=fitness_fn)
Loading

0 comments on commit 068bc0c

Please sign in to comment.