<a href="https://colab.research.google.com/github/marsggbo/AutoMLDemos/blob/master/ch3/BO-NAS.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 1. 搜索空间

In [1]:

import random
import torch
from torch import nn

# 搜索空间总共包含3**6=729个候选模型
spaces = {
    'channels_1': [16, 32, 64],
    'channels_2': [16, 32, 64],
    'channels_3': [16, 32, 64],
    'kernel_size_1': [3, 5, 7],
    'kernel_size_2': [3, 5, 7],
    'kernel_size_3': [3, 5, 7],
} 

# 随机采样模型编码
def sample_encoding():
    encoding = [random.choice(space) for space in spaces.values()]
    return encoding

# 模型构造函数
def build_model(c1, c2, c3, ks1, ks2, ks3):
    return nn.Sequential(
        nn.Conv2d(3, c1, kernel_size=ks1, stride=1, padding=ks1//2),
        nn.BatchNorm2d(c1), nn.ReLU(),
        nn.Conv2d(c1, c2, kernel_size=ks2, stride=2, padding=ks2//2),
        nn.BatchNorm2d(c2), nn.ReLU(),
        nn.Conv2d(c2, c3, kernel_size=ks3, stride=2, padding=ks3//2),
        nn.BatchNorm2d(c3), nn.ReLU(),
        nn.AdaptiveAvgPool2d(1), nn.Flatten(1),
        nn.Linear(c3, 10),  # 假设最后一层是10类分类任务
    )

def evaluate_model(model):
    accuracy = torch.rand(1).item()
    # 省略模型评估过程，根据具体任务进行模型评估，并返回性能指标
    return accuracy

  from .autonotebook import tqdm as notebook_tqdm


## 2. 代理模型：高斯过程

In [2]:

import numpy as np

class GaussianKernel:
    def __init__(self, length_scale=1.0, variance=1.0):
        self.length_scale = length_scale
        self.variance = variance
    
    def compute(self, X1, X2):
        pairwise_dists = np.abs(X1[:, None] - X2).sum(axis=2) # 曼哈顿距离
        K = self.variance * np.exp(-0.5 * pairwise_dists / self.length_scale**2)
        return K
    
class GaussianProcess:
    def __init__(self):
        self.kernel = GaussianKernel()
        self.X = None
        self.y = None
        self.K = None
        self.K_inv = None
        
    def fit(self, X, y):
        self.X = X
        self.y = y
        # 计算协方差矩阵
        self.K = self.kernel.compute(self.X, self.X)
        # 添加噪声
        noise = 1e-6
        self.K += noise * np.eye(len(self.X))
        # 计算协方差矩阵的逆
        self.K_inv = np.linalg.inv(self.K)
            
    def predict(self, X_new):
        # 计算均值向量
        K_star = self.kernel.compute(self.X, X_new)
        mean = np.dot(K_star.T, np.dot(self.K_inv, self.y))
        # 计算协方差矩阵
        K_star_star = self.kernel.compute(X_new, X_new)
        cov = K_star_star - np.dot(K_star.T, np.dot(self.K_inv, K_star))
        return mean, cov

## 3. 采集函数：期望改进

In [3]:

from scipy.stats import norm

def expected_improvement(X_candidates, X_init, y_init, gp, xi=0.01):
    # 计算当前已评估架构的均值和方差
    mean, cov = gp.predict(X_candidates)
    std = np.sqrt(np.diag(cov))
    # 计算期望改进
    best_performance = np.max(y_init)
    z = (mean - best_performance - xi) / std
    ei = (mean - best_performance - xi) * norm.cdf(z) + std * norm.pdf(z)
    # 选择具有最大期望改进的架构
    next_idx = np.argmax(ei)
    next_architecture = X_candidates[next_idx]
    return next_architecture


## 4. 主函数

### 4.1 初始化

In [4]:

def init(num=20):
    X_init, y_init = [], []
    for i in range(num):
        encoding = sample_encoding()
        model = build_model(*encoding)
        X_init.append(encoding)
        y_init.append(evaluate_model(model)) # 评估模型性能
    return np.array(X_init), np.array(y_init)

### 4.2 生成候选模型

In [5]:

def generate_candidates(num_candidates):
    candidates = [sample_encoding() for _ in range(num_candidates)]
    return np.array(candidates)

### 4.3 Main

In [6]:

def main(n_iters=50):
    X_init, y_init = init() # 初始化观测数据集   
    gp = GaussianProcess() # 初始化高斯过程代理模型
    acquisition_func = expected_improvement # 期望改进采集函数
    
    # 进行迭代优化
    for i in range(n_iters):
        # 拟合高斯过程模型
        gp.fit(X_init, y_init)

        # 生成候选模型架构集合
        X_candidates = generate_candidates(num_candidates=10)
        # 在候选架构中选择下一个要评估的架构
        X_next = acquisition_func(X_candidates, X_init, y_init, gp)
        
        # 在真实环境中评估架构的性能
        model_next = build_model(*X_next)
        y_next = evaluate_model(model_next)
        
        # 将新的架构和性能添加到已评估列表中
        X_init = np.vstack((X_init, X_next))
        y_init = np.append(y_init, y_next)
        
    # 返回优化结果
    best_idx = np.argmax(y_init)
    best_architecture = X_init[best_idx]
    best_performance = y_init[best_idx]
    print('最优架构：', best_architecture)
    print('最优性能：', best_performance)
    return best_architecture, best_performance
main()

最优架构： [16 64 16  7  7  3]
最优性能： 0.9965501427650452


(array([16, 64, 16,  7,  7,  3]), 0.9965501427650452)