In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import random
from copy import deepcopy

search_space = {
    'num_hidden_layers': [1, 2, 3, 4, 5],
    'hidden_layer_size': [32, 64, 128, 256, 512],
    'activation_function': ['ReLU', 'LeakyReLU', 'Tanh'],
    'learning_rate': [0.1, 0.01, 0.001, 0.0001],
    'optimizer': ['Adam', 'SGD', 'RMSprop'],
    'dropout_rate': [0.0, 0.2, 0.4, 0.6]
}

activation_map = {
    'ReLU': nn.ReLU,
    'LeakyReLU': nn.LeakyReLU,
    'Tanh': nn.Tanh
}

optimizer_map = {
    'Adam': optim.Adam,
    'SGD': optim.SGD,
    'RMSprop': optim.RMSprop
}


def create_dataset(num_samples=1000):
    x = torch.linspace(-5, 5, num_samples).unsqueeze(1)
    y = torch.sin(x) + 0.1 * torch.randn(x.size())
    return x, y


def build_model(architecture):
    layers = []
    layers.append(nn.Linear(1, architecture['hidden_layer_size']))
    layers.append(activation_map[architecture['activation_function']]())
    
    for _ in range(architecture['num_hidden_layers'] - 1):
        layers.append(nn.Linear(architecture['hidden_layer_size'], architecture['hidden_layer_size']))
        layers.append(activation_map[architecture['activation_function']]())
        layers.append(nn.Dropout(p=architecture['dropout_rate']))

    layers.append(nn.Linear(architecture['hidden_layer_size'], 1))

    return nn.Sequential(*layers)


def evaluate_architecture(architecture, X_train, y_train, X_val, y_val, num_epochs=50):
    model = build_model(architecture)
    criterion = nn.MSELoss()
    
    optimizer_class = optimizer_map[architecture['optimizer']]
    optimizer = optimizer_class(model.parameters(), lr=architecture['learning_rate'])
    
    model.train()
    for _ in range(num_epochs):
        optimizer.zero_grad()
        outputs = model(X_train)
        loss = criterion(outputs, y_train)
        loss.backward()
        optimizer.step()
    
    model.eval()
    with torch.no_grad():
        val_outputs = model(X_val)
        val_loss = criterion(val_outputs, y_val)
    
    return val_loss.item()


def run_evolutionary_search(search_space, population_size=10, num_generations=5):
    best_loss = float('inf')
    best_architecture = None
    
    X, y = create_dataset()
    split_idx = int(len(X) * 0.8)
    X_train, X_val = X[:split_idx], X[split_idx:]
    y_train, y_val = y[:split_idx], y[split_idx:]
    
    print(f"Starting Evolutionary Search with a population of {population_size} for {num_generations} generations...")
    
    population = []
    for _ in range(population_size):
        architecture = {key: random.choice(search_space[key]) for key in search_space}
        population.append(architecture)

    for gen in range(num_generations):
        print(f"\n--- Generation {gen+1}/{num_generations} ---")
        
        fitness = []
        for arch in population:
            loss = evaluate_architecture(arch, X_train, y_train, X_val, y_val, num_epochs=10)
            fitness.append((loss, arch))
            
            if loss < best_loss:
                best_loss = loss
                best_architecture = arch
        
        fitness.sort(key=lambda x: x[0])
        print(f"  Best loss in this generation: {fitness[0][0]:.4f}")
        
        new_population = []
        num_elites = population_size // 2
        elites = [arch for loss, arch in fitness[:num_elites]]
        new_population.extend(elites)
        
        while len(new_population) < population_size:
            parent1 = random.choice(elites)
            parent2 = random.choice(elites)
            
            child = deepcopy({})
            for key in parent1:
                child[key] = random.choice([parent1[key], parent2[key]])
            
            mutation_key = random.choice(list(search_space.keys()))
            child[mutation_key] = random.choice(search_space[mutation_key])
            
            new_population.append(child)
        
        population = new_population

    return best_architecture, best_loss


class ArchitectureController(nn.Module):
    def __init__(self, search_space):
        super(ArchitectureController, self).__init__()
        self.search_space = search_space
        self.keys = list(search_space.keys())
        self.vocab_size = [len(search_space[key]) for key in self.keys]
        self.num_actions = len(self.keys)
        self.rnn = nn.RNN(input_size=1, hidden_size=64, num_layers=1)
        self.policy_heads = nn.ModuleList([nn.Linear(64, vs) for vs in self.vocab_size])

    def forward(self, input, hidden):
        output, hidden = self.rnn(input, hidden)
        logits = [head(output.squeeze(0)) for head in self.policy_heads]
        return logits, hidden

   
def run_rl_search(search_space, X_train, y_train, X_val, y_val, num_epochs=10, num_episodes=5):
    controller = ArchitectureController(search_space)
    controller_optimizer = optim.Adam(controller.parameters(), lr=0.01)
    
    best_loss = float('inf')
    best_architecture = None
    
    print(f"Starting RL Search with {num_episodes} episodes...")

    for episode in range(num_episodes):
        controller_optimizer.zero_grad()
        hidden = torch.zeros(1, 1, 64)
        log_probs = []
        architecture = {}

        for i, key in enumerate(controller.keys):
            logits, hidden = controller(torch.zeros(1, 1, 1), hidden)
            dist = torch.distributions.Categorical(logits=logits[i])
            action_index = dist.sample()
            
            architecture[key] = search_space[key][action_index.item()]
            log_probs.append(dist.log_prob(action_index))
        
        val_loss = evaluate_architecture(architecture, X_train, y_train, X_val, y_val, num_epochs=num_epochs)
        
        reward = -val_loss
        policy_loss = torch.sum(torch.stack(log_probs) * -reward)
        policy_loss.backward()
        controller_optimizer.step()
        
        print(f"  Episode {episode+1}: Loss = {val_loss:.4f}, Reward = {reward:.4f}")

        if val_loss < best_loss:
            best_loss = val_loss
            best_architecture = architecture

    return best_architecture, best_loss


class DifferentiableCell(nn.Module):
    def __init__(self, in_features, out_features, ops):
        super(DifferentiableCell, self).__init__()
        self.ops = nn.ModuleList([
            nn.Sequential(nn.Linear(in_features, out_features), op()) for op in ops
        ])
        
    def forward(self, x, weights):
        return sum(w * op(x) for w, op in zip(weights, self.ops))

class DifferentiableModel(nn.Module):
    def __init__(self, search_space):
        super(DifferentiableModel, self).__init__()
        
        self.ops_list = [activation_map[name] for name in search_space['activation_function']]
        self.num_ops = len(self.ops_list)
        
        self.num_hidden_layers = max(search_space['num_hidden_layers'])
        self.hidden_layer_size = search_space['hidden_layer_size'][0]
        
        self.alphas = nn.Parameter(torch.randn(self.num_hidden_layers, self.num_ops, requires_grad=True))
        
        self.layers = nn.ModuleList()
        self.layers.append(nn.Linear(1, self.hidden_layer_size))
        for _ in range(self.num_hidden_layers - 1):
            self.layers.append(DifferentiableCell(self.hidden_layer_size, self.hidden_layer_size, self.ops_list))
        self.output_layer = nn.Linear(self.hidden_layer_size, 1)

    def forward(self, x):
        architecture_weights = nn.functional.softmax(self.alphas, dim=-1)
        
        output = x
        for i, layer in enumerate(self.layers):
            if isinstance(layer, nn.Linear):
                output = layer(output)
            elif isinstance(layer, DifferentiableCell):
                output = layer(output, architecture_weights[i-1])

        return self.output_layer(output)

    def discretize(self):
        architecture = {
            'num_hidden_layers': self.num_hidden_layers,
            'hidden_layer_size': self.hidden_layer_size,
            'learning_rate': 0.001,
            'optimizer': 'Adam',
            'dropout_rate': 0.0
        }
        
        best_op_indices = self.alphas.argmax(dim=-1)
        best_ops = [self.ops_list[i].__name__ for i in best_op_indices]
        
        architecture['activation_function'] = best_ops[0]
        return architecture

def run_gradient_based_search(search_space, X_train, y_train, X_val, y_val, num_epochs=50):
    model = DifferentiableModel(search_space)
    criterion = nn.MSELoss()
    
    arch_params = [model.alphas]
    arch_param_ids = {id(p) for p in arch_params}
    weight_params = [p for p in model.parameters() if p.requires_grad and id(p) not in arch_param_ids]
    
    optimizer_w = optim.Adam(weight_params, lr=0.01)
    optimizer_alpha = optim.Adam(arch_params, lr=0.001)

    print(f"Starting Gradient-based Search with {num_epochs} epochs...")

    for epoch in range(num_epochs):
        optimizer_w.zero_grad()
        outputs = model(X_train)
        loss_w = criterion(outputs, y_train)
        loss_w.backward()
        optimizer_w.step()
        
        optimizer_alpha.zero_grad()
        val_outputs = model(X_val)
        loss_alpha = criterion(val_outputs, y_val)
        loss_alpha.backward()
        optimizer_alpha.step()
        
        if (epoch+1) % 10 == 0:
            print(f"  Epoch {epoch+1}/{num_epochs}: Train Loss = {loss_w.item():.4f}, Arch Loss = {loss_alpha.item():.4f}")

    best_architecture = model.discretize()
    
    print("\n--- Gradient-based Search Complete ---")
    print("Found architecture from continuous search:")
    print(best_architecture)
    
    final_model = build_model(best_architecture)
    final_loss = evaluate_architecture(best_architecture, X_train, y_train, X_val, y_val, num_epochs=50)
    
    return best_architecture, final_loss


if __name__ == "__main__":
    X, y = create_dataset()
    split_idx = int(len(X) * 0.8)
    X_train, X_val = X[:split_idx], X[split_idx:]
    y_train, y_val = y[:split_idx], y[split_idx:]

    print("--- Running all three search strategies for comparison ---")
    
    best_arch_ea, best_perf_ea = run_evolutionary_search(search_space, population_size=10, num_generations=5)
    print("\n--- Evolutionary Search Complete ---")
    print("Best Architecture Found:")
    for key, value in best_arch_ea.items(): # type: ignore
        print(f"  {key.replace('_', ' ').title()}: {value}")
    print(f"Best Validation MSE: {best_perf_ea:.4f}")
    
    print("\n" + "="*50 + "\n")

    best_arch_rl, best_perf_rl = run_rl_search(search_space, X_train, y_train, X_val, y_val, num_episodes=5)
    print("\n--- RL Search Complete ---")
    print("Best Architecture Found:")
    for key, value in best_arch_rl.items(): # type: ignore
        print(f"  {key.replace('_', ' ').title()}: {value}")
    print(f"Best Validation MSE: {best_perf_rl:.4f}")

    print("\n" + "="*50 + "\n")

    best_arch_gb, best_perf_gb = run_gradient_based_search(search_space, X_train, y_train, X_val, y_val)
    print("\n--- Gradient-based Search Complete ---")
    print("Best Architecture Found:")
    for key, value in best_arch_gb.items():
        print(f"  {key.replace('_', ' ').title()}: {value}")
    print(f"Best Validation MSE: {best_perf_gb:.4f}")


--- Running all three search strategies for comparison ---
Starting Evolutionary Search with a population of 10 for 5 generations...

--- Generation 1/5 ---
  Best loss in this generation: 0.3794

--- Generation 2/5 ---
  Best loss in this generation: 0.3080

--- Generation 3/5 ---
  Best loss in this generation: 0.3199

--- Generation 4/5 ---
  Best loss in this generation: 0.3684

--- Generation 5/5 ---
  Best loss in this generation: 0.2911

--- Evolutionary Search Complete ---
Best Architecture Found:
  Num Hidden Layers: 4
  Hidden Layer Size: 32
  Activation Function: ReLU
  Learning Rate: 0.001
  Optimizer: SGD
  Dropout Rate: 0.6
Best Validation MSE: 0.2911


Starting RL Search with 5 episodes...
  Episode 1: Loss = 1.3425, Reward = -1.3425
  Episode 2: Loss = 0.9457, Reward = -0.9457
  Episode 3: Loss = 0.5463, Reward = -0.5463
  Episode 4: Loss = 1.0160, Reward = -1.0160
  Episode 5: Loss = 0.7519, Reward = -0.7519

--- RL Search Complete ---
Best Architecture Found:
  Num Hi