<a href="https://colab.research.google.com/github/MichelePresti/NAS_MachineLearningDeepLearning/blob/main/NASWOT_Project8.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#Define Config


In [1]:
config = {}
max_uid = 15625
#@title ##Configuration Info { run: "auto" }
#configuration by param
api_loading_mode = "Lite" #@param {type: "string"} ["Lite", "Full"]
dataset = "ImageNet16" #@param {type:"string"} ["cifar10", "cifar100", "ImageNet16"]
run_id =  1# @param {type:"integer"}
trial =  2#@param {type:"integer"}
n_random =  10#@param {type:"integer"}
point = '2a' # @param ['2a', '2b']
imagenet_path = 'Use only if dataset=Imagenet16' #@param{type:"string"}
use_default_path = True #@param{type:"boolean"}
n_evolution = 2#@param{type: "integer"}
n_arch_distance = 2#@param{type: "integer"}
n_survivor = 1#@param{type:"integer"}
population_size = 10#@param{type:"integer"}
proxy_type = "ReLU" #@param {type: "string"} ["ReLU", "SynFlow"]


config['score'] = 'hook_logdet'
config['nasspace'] = 'nasbench201'
config['augtype'] = 'none'
config['dataset'] = dataset
config['maxofn'] = 3
config['batch_size'] = 128
config['seed'] = 1
config['run_id'] = run_id
config['dataset_id'] = 'CIFAR10'
config['start_uid'] = 0 
config['stop_uid'] =  15000 
config['trial'] = trial
config['n_random'] = n_random
config['point'] = point
config['imagenet_path'] = '/content/drive/MyDrive/ImageNet16' if use_default_path else imagenet_path
config['n_evolution'] = n_evolution
config['n_arch_distance'] = n_arch_distance
config['n_survivor'] = n_survivor
config['population_size'] = population_size
config['proxy_type'] = proxy_type
config['api_loading_mode'] = api_loading_mode

#max 15625 stop_uid

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [3]:
%%capture
!git clone https://github.com/MichelePresti/NAS_MachineLearningDeepLearning

In [4]:
!cp -r /content/NAS_MachineLearningDeepLearning/neural_model .
!cp -r /content/NAS_MachineLearningDeepLearning/ZeroCostNas .

# Importing NasBenchAPI ✍

In [5]:
import pandas as pd

def get_arch_config_by_dataset(dataset) -> pd.DataFrame:
    """
    This function return the architectures config by dataset in a pandas dataframe.
    PARAMETERS:
       dataset= string among [cifar10, cifar100, imaginet]
    """
    if(dataset == 'cifar10'):
        df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/nas_bench_201__CIFAR10_config.csv', header=0)
        return df
    if(dataset == 'cifar100'):
      df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/nas_bench_201__CIFAR100_config.csv', header=0)
      return df
    if(dataset == 'ImageNet16'):
      df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/nas_bench_201__ImageNet16_config.csv', header=0)
      return df
    else: 
      print('Dataset name not valid')
      return None

def get_standard_config(csv_config: pd.DataFrame) -> dict:
    res = {}
    res['name'] = csv_config.iloc[0]['name']
    res['C'] = csv_config.iloc[0]['C']
    res['N'] = csv_config.iloc[0]['N']
    res['arch_str'] = csv_config.iloc[0]['arch_str']
    res['num_classes'] = 1
    return res

In [6]:
import pandas as pd

if config['api_loading_mode'] == 'Lite':
  searchspace = get_arch_config_by_dataset(config['dataset'])
else:
  # To be implemented the full version (Loading the NASBench201 API)
  pass

print('SearchSpace Loaded')

SearchSpace Loaded


#Import Dataset

In [7]:
##################################################
# Copyright (c) Xuanyi Dong [GitHub D-X-Y], 2019 #
##################################################
import os, sys, hashlib, torch
import numpy as np
from PIL import Image
import torch.utils.data as data

if sys.version_info[0] == 2:
    import cPickle as pickle
else:
    import pickle


def calculate_md5(fpath, chunk_size=1024 * 1024):
    md5 = hashlib.md5()
    with open(fpath, "rb") as f:
        for chunk in iter(lambda: f.read(chunk_size), b""):
            md5.update(chunk)
    return md5.hexdigest()


def check_md5(fpath, md5, **kwargs):
    return md5 == calculate_md5(fpath, **kwargs)


def check_integrity(fpath, md5=None):
    if not os.path.isfile(fpath):
        return False
    if md5 is None:
        return True
    else:
        return check_md5(fpath, md5)


class ImageNet16(data.Dataset):
    # http://image-net.org/download-images
    # A Downsampled Variant of ImageNet as an Alternative to the CIFAR datasets
    # https://arxiv.org/pdf/1707.08819.pdf

    train_list = [
        ["train_data_batch_1", "27846dcaa50de8e21a7d1a35f30f0e91"],
        ["train_data_batch_2", "c7254a054e0e795c69120a5727050e3f"],
        ["train_data_batch_3", "4333d3df2e5ffb114b05d2ffc19b1e87"],
        ["train_data_batch_4", "1620cdf193304f4a92677b695d70d10f"],
        ["train_data_batch_5", "348b3c2fdbb3940c4e9e834affd3b18d"],
        ["train_data_batch_6", "6e765307c242a1b3d7d5ef9139b48945"],
        ["train_data_batch_7", "564926d8cbf8fc4818ba23d2faac7564"],
        ["train_data_batch_8", "f4755871f718ccb653440b9dd0ebac66"],
        ["train_data_batch_9", "bb6dd660c38c58552125b1a92f86b5d4"],
        ["train_data_batch_10", "8f03f34ac4b42271a294f91bf480f29b"],
    ]
    valid_list = [
        ["val_data", "3410e3017fdaefba8d5073aaa65e4bd6"],
    ]

    def __init__(self, root, train, transform, use_num_of_class_only=None):
        self.root = root
        self.transform = transform
        self.train = train  # training set or valid set
        if not self._check_integrity():
            raise RuntimeError("Dataset not found or corrupted.")

        if self.train:
            downloaded_list = self.train_list
        else:
            downloaded_list = self.valid_list
        self.data = []
        self.targets = []

        # now load the picked numpy arrays
        for i, (file_name, checksum) in enumerate(downloaded_list):
            file_path = os.path.join(self.root, file_name)
            # print ('Load {:}/{:02d}-th : {:}'.format(i, len(downloaded_list), file_path))
            with open(file_path, "rb") as f:
                if sys.version_info[0] == 2:
                    entry = pickle.load(f)
                else:
                    entry = pickle.load(f, encoding="latin1")
                self.data.append(entry["data"])
                self.targets.extend(entry["labels"])
        self.data = np.vstack(self.data).reshape(-1, 3, 16, 16)
        self.data = self.data.transpose((0, 2, 3, 1))  # convert to HWC
        if use_num_of_class_only is not None:
            assert (
                isinstance(use_num_of_class_only, int)
                and use_num_of_class_only > 0
                and use_num_of_class_only < 1000
            ), "invalid use_num_of_class_only : {:}".format(use_num_of_class_only)
            new_data, new_targets = [], []
            for I, L in zip(self.data, self.targets):
                if 1 <= L <= use_num_of_class_only:
                    new_data.append(I)
                    new_targets.append(L)
            self.data = new_data
            self.targets = new_targets

    def __repr__(self):
        return "{name}({num} images, {classes} classes)".format(
            name=self.__class__.__name__,
            num=len(self.data),
            classes=len(set(self.targets)),
        )

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index] - 1

        img = Image.fromarray(img)

        if self.transform is not None:
            img = self.transform(img)

        return img, target

    def __len__(self):
        return len(self.data)

    def _check_integrity(self):
        root = self.root
        for fentry in self.train_list + self.valid_list:
            filename, md5 = fentry[0], fentry[1]
            fpath = os.path.join(root, filename)
            if not check_integrity(fpath, md5):
                return False
        return True



In [8]:
from torch.utils.data import DataLoader
import torchvision.transforms as transforms
import torchvision
import torch

def get_dataset(dataset) -> DataLoader:
    """
    This function return the dataset given its name in torch DataLoader format.
    PARAMETERS:
       dataset= string among [cifar10, cifar100, imaginet]
    """

    if dataset == 'cifar10':
        mean = [x / 255 for x in [125.3, 123.0, 113.9]]
        std = [x / 255 for x in [63.0, 62.1, 66.7]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
              transforms.Normalize(mean, std)]
        transform = transforms.Compose(lists)
        trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                                download=True, transform=transform)
        train_dt = torch.utils.data.DataLoader(trainset, batch_size=config['batch_size'],
                                                  shuffle=True, num_workers=2)
    elif dataset == 'cifar100':
        mean = [x / 255 for x in [129.3, 124.1, 112.4]]
        std = [x / 255 for x in [68.2, 65.4, 70.4]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(32, padding=4), transforms.ToTensor(),
              transforms.Normalize(mean, std)]
        transform = transforms.Compose(lists)
        trainset = torchvision.datasets.CIFAR100(root='./data', train=True,
                                                download=True, transform=transform)
        train_dt = torch.utils.data.DataLoader(trainset, batch_size=config['batch_size'],
                                                  shuffle=True, num_workers=2)
    elif dataset.startswith('ImageNet16'):
        mean = [x / 255 for x in [122.68, 116.66, 104.01]]
        std = [x / 255 for x in [63.22, 61.26, 65.09]]
        lists = [transforms.RandomHorizontalFlip(), transforms.RandomCrop(16, padding=2), transforms.ToTensor(),
                 transforms.Normalize(mean, std)]
        transform = transforms.Compose(lists)
        trainset = ImageNet16(config['imagenet_path'], True, transform, 120)
        train_dt = torch.utils.data.DataLoader(trainset, batch_size=config['batch_size'],
                                                  shuffle=True, num_workers=2)
    else:
        raise TypeError("Unknow dataset : {:}".format(dataset))

    return train_dt

In [9]:
train_dt = get_dataset(config['dataset'])

In [10]:
import pandas as pd

if config['dataset'] == 'cifar10':
  df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/Cifar10Result.csv')
elif config['dataset'] == 'cifar100':
  df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/Cifar10Result.csv')
else:
  df = pd.read_csv('/content/NAS_MachineLearningDeepLearning/ImageNet16Result.csv')

try: 
  df.drop(columns=['Unnamed: 0'], inplace=True)
except:
  print("Already dropped")

acc_df = df


# NASWOT Scoring Algorithm ➗

In [11]:
import numpy as np

def get_batch_jacobian(net, x, target, device, args=None):
    net.zero_grad()
    x.requires_grad_(True)
    y, out = net(x)
    y.backward(torch.ones_like(y))
    jacob = x.grad.detach()
    return jacob, target.detach(), y.detach(), out.detach()


def hooklogdet(K, labels=None):
    s, ld = np.linalg.slogdet(K)
    return ld


def score_network(network, x, x2, target, device):
    jacobs, labels, y, out = get_batch_jacobian(network, x, target, device, config)
    network(x2.to(device))
    value = hooklogdet(network.K, target)
    return value

In [12]:
import time
import random
import pandas as pd
from neural_model.neural_model import get_cell_net

"""
NAS WOT Algorithm
"""

def naswot_search(dataset, device, population, run_id=-1) -> pd.DataFrame:
    result = {'uid': [], 'score': [], 'elapsed_time': [], 'accuracy': [], 'run_id':[]}
    for uid in population:
      net_config: pd.DataFrame = searchspace.loc[searchspace['uid'] == uid]
      net_config: dict = get_standard_config(net_config)
      network = get_cell_net(net_config)
      try:
          start = time.time()
          if 'hook_' in config['score']:

              def counting_forward_hook(module_hook, inp, out):
                  try:
                      if hasattr(module_hook, 'visited_backwards') and not module_hook.visited_backwards:
                          return
                      if isinstance(inp, tuple):
                          inp = inp[0]
                      inp = inp.view(inp.size(0), -1)
                      x = (inp > 0).float()
                      K = x @ x.t()
                      K2 = (1. - x) @ (1. - x.t())
                      if hasattr(network, 'K'):
                        network.K = network.K + K.cpu().numpy() + K2.cpu().numpy()
                      else: 
                        network.K = K.cpu().numpy() + K2.cpu().numpy()
                  except Exception as exception:
                      print(exception)
                      pass


              def counting_backward_hook(module_hook, inp, out):
                  module_hook.visited_backwards = True

              j = []
              for name, module in network.named_modules():
                  j.append(name)
                  if 'ReLU' in str(type(module)):
                      module.register_forward_hook(counting_forward_hook)
                      module.register_backward_hook(counting_backward_hook)
          network = network.to(device)
          s = []
          for j in range(config['maxofn']):
              data_iterator = iter(train_dt)
              x, target = next(data_iterator)
              x2 = torch.clone(x)
              x2 = x2.to(device)
              x, target = x.to(device), target.to(device)
              jacobs, labels, y, out = get_batch_jacobian(network, x, target, device, config)
              if 'hook_' in config['score']:
                  network(x2.to(device))
                  value = hooklogdet(network.K, target)
                  s.append(value)
              else:
                  value = hooklogdet(network.K, target)
                  s.append(value)
          acc = acc_df['valid-accuracy'].iloc[uid]
          print(f'Score (uid {uid}): {np.mean(s)}, Accuracy: {acc}')
          stop = time.time()
          result['uid'].append(uid)
          result['score'].append(np.mean(s))
          result['accuracy'].append(acc)
          result['run_id'].append(run_id)
          score = np.mean(s)
          result['elapsed_time'].append(stop-start)
      except Exception as e:
          print(e)
    df = pd.DataFrame.from_dict(result)

    result = {'uid': [], 'score': [], 'elapsed_time': [], 'run_id':[]}
    return df

# Point 2 Project 8 ➰

---

A. Run 30 random search experiments on NASWOT algorithm, storing results about score, accuracy and time.

B. For each of the above experiments store the best performing architecture.

In [None]:
!mkdir ResultToSave

In [27]:
#@title ##Point 2 Configuration { run: "auto" }
#configuration by param
n_trial =  2#@param {type:"integer"}
n_population =  10#@param {type:"integer"}
run_save_path = 'NASWOT_Point2a'#@param {type:"string"}
best_save_path = 'NASWOT_Point2b'#@param {type:"string"}



In [28]:
import time
import requests
import random
import os
import pandas as pd
from tabulate import tabulate
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
os.environ['WANDB_CONSOLE'] = 'off'
start = time.time()

run_id = config['run_id']
dataset = config['dataset']
n = n_population   # N size of random sample
trial = n_trial
best = {}

print('*******************************')
print('Running Random Search algorithm')
print('Parameters:')
print(f'Dataset: {dataset}')
print(f'Num Round: {trial}')
print(f'Population Size: {n}')
print('*******************************')


for i in range(trial):
  print(f"Round {i}")

  # Sample N Random architectures among the searchspace
  population = random.sample(range(max_uid), n)

  # Train Population
  trained_population = naswot_search(dataset=dataset, device=device, population=population, run_id=i)

  # Save Training Results
  trained_population.to_csv(f'ResultToSave/{run_save_path}_RunID_{i}_Dataset_{dataset}.csv')
  trained_population.sort_values(by=['score'], ascending=False, inplace=True)
  if len(best) > 0:
    best: pd.DataFrame = best.append(trained_population.head(1), ignore_index=True)
  else:
    best = trained_population.head(config['n_survivor'])

best.sort_values(by=['score'], ascending=False, inplace=True)
best_of_all = best.head(1)

best.to_csv(f'ResultToSave/{best_save_path}_Dataset_{dataset}.csv')
stop = time.time()

total_time = stop - start
print('*****************************************************************')
print(f'Best performing net with RandomSearch')
print(tabulate(best_of_all, headers='keys', tablefmt='psql', showindex=False))
print(f'Total time for search over all searchspace: {total_time}')
print('*****************************************************************')


*******************************
Running Random Search algorithm
Parameters:
Dataset: ImageNet16
Num Round: 2
Population Size: 10
*******************************
Round 0




Score (uid 1442): 1470.3031005859375, Accuracy: 13.56666664123535
Score (uid 2372): 1622.1953125, Accuracy: 25.066666625976563
Score (uid 14651): 1703.5279541015625, Accuracy: 22.43333325195313
Score (uid 1169): 1638.5196533203125, Accuracy: 24.76666661071777
Score (uid 13998): 1477.0904541015625, Accuracy: 13.933333315531414
Score (uid 11642): 1583.4346923828125, Accuracy: 12.93333331044515
Score (uid 5252): 1572.8642578125, Accuracy: 23.73333330790202
Score (uid 11442): 1590.5576171875, Accuracy: 22.66666665649414
Score (uid 8505): 1641.9443359375, Accuracy: 24.9666666208903
Score (uid 5773): 1494.6463623046875, Accuracy: 21.06666663360596
Round 1
Score (uid 12033): 1503.84814453125, Accuracy: 23.23333326212565
Score (uid 1681): 1597.1517333984375, Accuracy: 26.46666662597656
Score (uid 8390): 1536.8912353515625, Accuracy: 15.5333333307902
Score (uid 14959): 1494.7535400390625, Accuracy: 24.533333338419595
Score (uid 12379): 1679.8277587890625, Accuracy: 25.666666666666668
Score (uid

In [29]:
# Save Results
import pandas as pd
import os, fnmatch

def find(pattern, path):
    result = []
    for root, dirs, files in os.walk(path):
        for name in files:
            if fnmatch.fnmatch(name, pattern):
                result.append(os.path.join(root, name))
    return result

files_2a = find(f'{run_save_path}_*', '/content/ResultToSave/')
files_2b = find(f'{run_save_path}_*', '/content/ResultToSave/')

df = []
for file in files_2a:
  

df = pd.concat(df)
df.sort_values(by='uid', inplace=True)
df.to_csv(f'/content/drive/MyDrive/RisultatiNASWOT/NASWOT_result_run7_datasetImageNet.csv')




['/content/ResultToSave/NASWOT_Point2a_RunID_0_Dataset_ImageNet16.csv', '/content/ResultToSave/NASWOT_Point2a_RunID_1_Dataset_ImageNet16.csv']


In [30]:
import os

os.renames('/content/test.csv', '/content/ResultToSave/test.csv')