In [84]:
import gym
from gym import error, spaces, utils
from gym.utils import seeding
import numpy as np
import math
import matplotlib.pyplot as plt
from matplotlib import colors
import random
import sys
import torch
from torch import nn
from torch.nn import functional as F
import torchvision.datasets as dsets
import torchvision.transforms as transforms
from tqdm import tqdm
from torch.utils.data import Dataset

In [85]:
batch_size=32
device='cpu'
lr=0.0001

In [103]:
class env():
  metadata = {'render_modes': ['human']}

  actions_set = {
                # increase channels with kernel 3
                'conv=channel_factor:2,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:4,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:8,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:16,kernel_size:3,stride:1,padding:0-',
                # increase channels with kernel 5
                'conv=channel_factor:2,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:4,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:8,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:16,kernel_size:5,stride:1,padding:0-',
                # increase channels with kernel 7
                'conv=channel_factor:2,kernel_size:7,stride:1,padding:0-',
                'conv=channel_factor:4,kernel_size:7,stride:1,padding:0-',
                'conv=channel_factor:8,kernel_size:7,stride:1,padding:0-',
                'conv=channel_factor:16,kernel_size:7,stride:1,padding:0-',

                # decrease channels with kernel 3
                'conv=channel_factor:0.1,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.2,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.4,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.8,kernel_size:3,stride:1,padding:0-',
                # decrease channels with kernel 5
                'conv=channel_factor:0.1,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:0.2,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:0.4,kernel_size:5,stride:1,padding:0-',
                'conv=channel_factor:0.8,kernel_size:5,stride:1,padding:0-',
                # decrease channels with kernel 7
                'conv=channel_factor:0.1,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.2,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.4,kernel_size:3,stride:1,padding:0-',
                'conv=channel_factor:0.8,kernel_size:3,stride:1,padding:0-',


                 'batchnorm=eps:0.00001-',

                 'avgpool=kernel_size:2,stride:2,padding:0-',
                 'avgpool=kernel_size:3,stride:3,padding:0-',
                 'avgpool=kernel_size:5,stride:5,padding:0-',
                 'avgpool=kernel_size:7,stride:7,padding:0-',

                 'maxpool=kernel_size:2,stride:2,padding:0-',
                 'maxpool=kernel_size:3,stride:3,padding:0-',
                 'maxpool=kernel_size:3,stride:5,padding:0-',
                 'maxpool=kernel_size:3,stride:7,padding:0-',

                 'dropout=p:0.1-',
                 'dropout=p:0.2-',
                 'dropout=p:0.3-',
                 'dropout=p:0.4-',
  }

  NN_CREATE_SUCCESS_REWARD = 5
  NN_CREATE_NOT_SUCCESS_PENALTY = -5


  def __init__(self, render_mode=None):
      self.max_its = 10
      self.current_it = 1
      self.episode_reward = 0
      #self.last_layer_shape = None

      self.ENV_PARAMS = {
          'iteration': 1,
          'max_its': 10,
          'episode_reward': 0,
          'actions_amount': len(self.actions_set),
      }

      self.NN_PARAMS = {
          'train_epochs': 10,
          'last_nets_metrics_memory_len': 10,
          'max_layers_amount': 10,
          'min_layers_amount': 3,
          'in_channels': 1,
      }
      self.NN_PARAMS['metrics'] = {'valid_losses': np.array([0] * self.NN_PARAMS['train_epochs']),
                      'train_losses': np.array([0] * self.NN_PARAMS['train_epochs']),
                     }


      self.Net = self.NN(self.NN_PARAMS['in_channels'])
      self.train_dataloader = None
      self.train_dataloader = None
      self.optimizer = None
      self.criterion = None

      self.nngenerator = self.nnGenerator()

      self.last_obs = None

      self.action_space = spaces.Text(
          max_length=self.NN_PARAMS['max_layers_amount'],
          min_length=self.NN_PARAMS['min_layers_amount'],
          charset=self.actions_set)

      self.observation_space = spaces.Dict(
          {
          'last_nets_metrics_memory': spaces.Box(
              low=0,
              high=1,
              shape=(self.NN_PARAMS['last_nets_metrics_memory_len'],
                     self.NN_PARAMS['train_epochs'],
                     len(self.NN_PARAMS['metrics'].keys()),
                     )),

          }
      )
      self.statistics = {
        'episode_rewards': [],
        'global_rewards': [],
        'made_steps': [],
      }

      self.seed()
      assert render_mode is None or render_mode in self.metadata["render_modes"]
      self.render_mode = render_mode


  class NN(nn.Module):

    def __init__(self, in_channels):
      super().__init__()

      self.first_block = nn.Sequential(
          nn.Conv2d(in_channels=in_channels,
                    out_channels=32,
                    kernel_size=3,
                    padding='same'),

          nn.ReLU(inplace=True),

          nn.Conv2d(in_channels=32,
                    out_channels=4,
                    kernel_size=3,
                    padding='same'),

          nn.ReLU(inplace=True),
      )

      self.backbone = nn.Sequential()
      self.classifier = nn.Linear(4 * 28 *28, 10)
      self.softmax = nn.Softmax(dim=1)

    def forward(self, x):
      x = self.first_block(x)
      #x = self.backbone(x)
      x = torch.flatten(x, start_dim=1)
      out = self.classifier(x)
      out = self.softmax(out)
      return out

    def __call__(self, x):
      return self.forward(x)

  def set_train_dataloader(self, train_dataloader):
    self.train_dataloader = train_dataloader

  def set_valid_dataloader(self, valid_dataloader):
    self.valid_dataloader = valid_dataloader

  def set_criterion(self, criterion):
    self.criterion = criterion

  def set_optimizer(self, optimizer):
    self.optimizer = optimizer

  def train(self):
    train_losses = []
    valid_losses = []
    # TODO calculate metrics and return them after train
    def CalcValLoss(self):
        with torch.no_grad():
            losses = []
            for X, Y in self.valid_dataloader:
                X = X.float().to(device)
                Y = Y.float().to(device)
                preds = self.Net(X)
                preds, _ = torch.max(preds,1)
                loss = self.criterion(preds,Y)
                losses.append(loss.item())
            #print("Valid Loss : {:.6f}".format(torch.tensor(losses).mean()))
            valid_losses.append(torch.tensor(losses).mean())

    for i in range(1, self.NN_PARAMS['train_epochs']):
        losses = []
        for X, Y in tqdm(self.train_dataloader):
            X = X.float().to(device)
            Y = Y.float().to(device)
            preds = self.Net(X)
            preds, _ = torch.max(preds,1)
            loss = self.criterion(preds, Y)
            losses.append(loss.item())

            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()

        #print("Train Loss : {:.6f}".format(torch.tensor(losses).mean()))
        train_losses.append(torch.tensor(losses).mean())

    # update all required NN metrics
    self.NN_PARAMS['metrics']['train_losses'] = train_losses
    self.NN_PARAMS['metrics']['valid_losses'] = valid_losses

  class nnGenerator():
    def __init__(self):
      self.text_layers_dict = dict({})
      self.nn_len = -1

    def parse_action(self, action):
      if action[-1] == '-':
        action = action[:-1]
      text_layers = action.split('-')
      for text_layer in text_layers:
        tmp = text_layer.split('=')
        layer_name, layer_params = tmp[0], tmp[1].split(',')
        layer_params_dict = dict({})
        for param in layer_params:
          param = param.split(':')
          param_name, param_value = param[0], param[1]
          layer_params_dict[param_name] = param_value
        self.text_layers_dict[layer_name] = layer_params_dict

    def get_text_layers_dict(self):
      return self.text_layers_dict

    def get_nn_len(self):
      return self.nn_len

    def generateNN(self): # -> nn.Sequential
      # returns success_state, backbone and classifier of nn, nn.Sequential
      # if impossible to create nn, success_state = False
      success_state = False
      # TODO

      # update self.nn_len if created correctly

      backbone = nn.Sequential()
      classifier = nn.Sequential()

      return success_state, backbone, classifier


  def seed(self, seed=None):
      self.np_random, seed = seeding.np_random(seed)
      return [seed]


  def calc_reward(self, nn_created_correctly_flag, nn_len, last_train_metrics):
    reward = 0
    # reward for decreasing nn depth
    optimal_depth_reward = 0
    # reward by metrics
    metrics_optimization_reward = 0
    # reward for successfull nn creation
    creation_successfull_reward = 0
    if nn_created_correctly_flag == True:
      # do not reward agent if creation is not succeed
      creation_successfull_reward += self.NN_CREATE_SUCCESS_REWARD
      # TODO
      metrics_optimization_reward += 1
      # TODO
      optimal_depth_reward

    else:
      creation_successfull_reward += self.NN_CREATE_NOT_SUCCESS_PENALTY

    reward += optimal_depth_reward
    reward += metrics_optimization_reward
    reward += creation_successfull_reward

    return reward

  def create_obs(self):
    # TODO create new observation using
    # self.NN_PARAMS['metrics'] and self.last_obs
    obs = np.zeros((
              self.NN_PARAMS['last_nets_metrics_memory_len'],
              self.NN_PARAMS['train_epochs'],
              len(self.NN_PARAMS['metrics'].keys()),
             ) )
    return obs

  def step(self, action):
      reward = 0
      done = False
      info = {} # You may need to add some extra information

      '''
      1) parse action
      2) generate NN
      3) (optionally) update optimizer, prepare for training
      4) train NN, collect metrics
      5) calculate reward
      6) collect statistics
      7) create new observation
      8) return obs, reward, done, info
      '''
      reward = 0

      self.nngenerator.parse_action(action)
      success_state, backbone, classifier = self.nngenerator.generateNN()
      if success_state == True: # NN created_correctly
        self.Net.backbone = backbone
        self.Net.classifier = classifier
        self.train()


      reward = self.calc_reward(success_state,
                                self.nngenerator.get_nn_len(),
                                self.NN_PARAMS['metrics'],
                                )

      # TODO generate new observation
      new_obs = self.create_obs()

      current_obs = {
          'last_nets_metrics_memory': new_obs
          }

      self.last_obs = new_obs

      self.episode_reward = reward
      self.current_it += 1
      return current_obs, reward, done, info


  def reset(self):
      '''
      Reset the env
      '''
      # clearify all changable vars
      self.Net = None
      if self.optimizer is not None:
        self.optimizer = self.optimizer.zero_grad()
      # self.criterion = None
      # may be it's better to set random number over zeros
      current_obs = {
          'last_nets_metrics_memory': np.zeros((
              self.NN_PARAMS['last_nets_metrics_memory_len'],
              self.NN_PARAMS['train_epochs'],
              len(self.NN_PARAMS['metrics'].keys()),
             ) )
          }
      self.last_obs = current_obs
      self.episode_reward = 0
      self.current_it = 1

      self.statistics['episode_rewards'] = []
      self.statistics['made_steps'] = []

      return current_obs



  def render(self, mode=None):
    #TODO visualization
    pass


  def close(self):
      pass

In [104]:
env = env()

In [105]:
env.reset()

{'last_nets_metrics_memory': array([[[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
        [[0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.],
         [0., 0.]],
 
       

In [106]:
env.step("dropout=p:0.2-dropout=p:0.2-dropout=p:0.2-")

({'last_nets_metrics_memory': array([[[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],
  
         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],
  
         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],
  
         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.]],
  
         [[0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0.],
          [0., 0

In [None]:
train_data = dsets.MNIST(root = './data', train = True,
                        transform = transforms.ToTensor(), download = True)

test_data = dsets.MNIST(root = './data', train = False,
                       transform = transforms.ToTensor())

In [None]:
train_samples = np.expand_dims(np.array(train_data.data), axis=1)[:5000]

In [None]:
train_labels = np.array(train_data.targets)[:5000]

In [None]:

class myDataset(Dataset):
    def __init__(self, X, y):

      self.X = X
      self.y = y

    def __len__(self):
        return len(self.y)

    def __getitem__(self, idx):
        x_ = self.X[idx]
        y_ = self.y[idx]
        return x_, y_

In [None]:
dataset = myDataset(X=train_samples, y=train_labels)

In [None]:
train_set, valid_set = torch.utils.data.random_split(dataset, [0.8, 0.2], generator=torch.Generator().manual_seed(42))

In [None]:

train_dataloader = torch.utils.data.DataLoader(
  train_set,
  batch_size=batch_size,
  shuffle=True,
  drop_last=True)

valid_dataloader = torch.utils.data.DataLoader(
  valid_set,
  batch_size=batch_size,
  drop_last=True,
  shuffle=True)

In [None]:
env.set_train_dataloader(train_dataloader)
env.set_valid_dataloader(valid_dataloader)

In [None]:
env.set_criterion(torch.nn.CrossEntropyLoss())
env.set_optimizer(torch.optim.SGD(env.NN.parameters(), lr=lr))

In [None]:
env.train()

100%|██████████| 125/125 [00:01<00:00, 67.06it/s]


Train Loss : 490.178558


100%|██████████| 125/125 [00:02<00:00, 60.31it/s]


Train Loss : 485.136169


100%|██████████| 125/125 [00:01<00:00, 74.70it/s]


Train Loss : 481.926880


100%|██████████| 125/125 [00:01<00:00, 75.41it/s]


Train Loss : 480.305450


100%|██████████| 125/125 [00:01<00:00, 76.00it/s]


Train Loss : 479.286804


100%|██████████| 125/125 [00:01<00:00, 69.38it/s]


Train Loss : 478.420776


100%|██████████| 125/125 [00:02<00:00, 60.14it/s]


Train Loss : 477.736420


100%|██████████| 125/125 [00:02<00:00, 59.43it/s]


Train Loss : 477.123596


100%|██████████| 125/125 [00:01<00:00, 74.26it/s]


Train Loss : 476.843506


100%|██████████| 125/125 [00:01<00:00, 77.46it/s]

Train Loss : 476.379364





([tensor(490.1786),
  tensor(485.1362),
  tensor(481.9269),
  tensor(480.3055),
  tensor(479.2868),
  tensor(478.4208),
  tensor(477.7364),
  tensor(477.1236),
  tensor(476.8435),
  tensor(476.3794)],
 [])

In [None]:
# PyTorch 2D layers with parameters
# conv2d() + relu
# torch.nn.Conv2d(
#     in_channels,
#     out_channels,
#     kernel_size,
#     stride=1,
#     padding=0,
#     dilation=1,
#     groups=1,
#     bias=True,
#     padding_mode='zeros',
#     device=None,
#     dtype=None
#     )

# batchnorm2d()
# torch.nn.BatchNorm2d(
#     num_features,
#     eps=1e-05,
#     momentum=0.1,
#     #affine=True,
#     #track_running_stats=True,
#     #device=None,
#     #dtype=None
#     )

# avgpool2d()
# torch.nn.AvgPool2d(
#     kernel_size,
#     stride=None,
#     padding=0,
#     #ceil_mode=False,
#     #count_include_pad=True,
#     #divisor_override=None
#     )

# maxpool2d()
# torch.nn.MaxPool2d(
#     kernel_size,
#     stride=None,
#     padding=0,
#     dilation=1,
#     return_indices=False,
#     ceil_mode=False)

# dropout()
# torch.nn.Dropout2d(
#     p=0.5,
#     #inplace=False
#     )

In [70]:
t = spaces.Text(max_length=10, min_length=3,charset={'conv=channel_factor:2,kernel_size:3,stride:1,padding:0-',
                                                     'conv=channel_factor:0.5,kernel_size:3,stride:1,padding:0-',
                                                     'batchnorm=eps:0.00001-',
                                                     'avgpool=kernel_size:2,stride:2,padding:0-',
                                                     'avgpool=kernel_size:3,stride:2,padding:0-',
                                                     'maxpool=kernel_size:2,stride:2,padding:0-',
                                                     'maxpool=kernel_size:3,stride:2,padding:0-',
                                                     'dropout=p:0.1-',
                                                     'dropout=p:0.2-',
                                                     })

In [71]:
action = t.sample()

In [72]:
action

'batchnorm=eps:0.00001-maxpool=kernel_size:2,stride:2,padding:0-avgpool=kernel_size:3,stride:2,padding:0-maxpool=kernel_size:2,stride:2,padding:0-'

In [73]:
class actionParser():
  def __init__(self):
    self.text_layers_dict = dict({})
    pass

  def parse_action(self, action):
    if action[-1] == '-':
      action = action[:-1]
    text_layers = action.split('-')
    for text_layer in text_layers:
      tmp = text_layer.split('=')
      layer_name, layer_params = tmp[0], tmp[1].split(',')
      layer_params_dict = dict({})
      for param in layer_params:
        param = param.split(':')
        param_name, param_value = param[0], param[1]
        layer_params_dict[param_name] = param_value
      self.text_layers_dict[layer_name] = layer_params_dict

  def get_text_layers_dict(self):
    return self.text_layers_dict

In [74]:
parser = actionParser()
parser.parse_action(action)
res = parser.get_text_layers_dict()
for r in res.keys():
  print(r, res[r])

batchnorm {'eps': '0.00001'}
maxpool {'kernel_size': '2', 'stride': '2', 'padding': '0'}
avgpool {'kernel_size': '3', 'stride': '2', 'padding': '0'}
