In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import math
import matplotlib.pyplot as plt
import torch.nn.init as init

In [2]:
def np_to_torch(arr):
    
    arr = torch.FloatTensor(arr)
    arr = arr.unsqueeze(-1)
    arr = arr.clone().detach().requires_grad_(True)
    
    return arr

def x_train_data(N_x, N_t, x_l, x_r):
    
    x_train = np.linspace(x_l, x_r, N_x)
    x_train = np.tile(x_train, N_t)
    x_train = np_to_torch(x_train)
    
    return x_train

def t_train_data(N_x, N_t, t_i, t_f):
    
    t_train = np.linspace(t_i, t_f, N_t)
    t_train = np.repeat(t_train, N_x)
    t_train = np_to_torch(t_train)

    return t_train

def generate_genome(mean, std, num_weights, num_biases):
    return np.random.normal(mean, std, num_weights), np.random.normal(mean, std, num_biases)

def generate_population(num_pop, mean, std, num_weights, num_biases):
    pop_wts = []
    pop_bia = []
    
    for i in range(num_pop):
        a, b = generate_genome(mean, std, num_weights, num_biases)
        pop_wts.append(a)
        pop_bia.append(b)
    
    return pop_wts, pop_bia

class ANN(nn.Module):
    def __init__(self, layer_size):
        super(ANN, self).__init__()
        
        self.l1 = nn.Linear(2, 5)
        self.l2 = nn.Linear(5, 5)
        self.l3 = nn.Linear(5, 1)
        
    def forward(self, x_train, t_train):
        
        tanh = nn.Tanh()
        a = tanh( self.l1(torch.cat((x_train, t_train),1)) )
        b = tanh( self.l2(a) )
        T = self.l2(b)
        
#         print(T[0:10])
        dTdx = torch.autograd.grad(T, x_train, grad_outputs=torch.ones_like(T), create_graph=True)[0]
        d2Tdx2 = torch.autograd.grad(dTdx, x_train, grad_outputs=torch.ones_like(dTdx), create_graph=True)[0]

        dTdt = torch.autograd.grad(T, t_train, grad_outputs=torch.ones_like(T), create_graph=True)[0]
        
        return T, dTdx, d2Tdx2, dTdt

def num_weights_biases(model):
    
    num_weights = 0
    num_biases = 0

    for param_tensor in model.state_dict():
        if 'weight' in param_tensor:
            num_weights += model.state_dict()[param_tensor].numel()
        elif 'bias' in param_tensor:
            num_biases += model.state_dict()[param_tensor].numel()

    return num_weights, num_biases

def eval_fitness_loss(x_train, t_train, k1, mat_1, mat_2, mat_3, N, N_1, N_2, N_3):
    
    T, dTdx, d2Tdx2, dTdt  = model(x_train, t_train)
    eq1 = torch.sum( torch.square( dTdt - k1*d2Tdx2 )  )/(N) 
    bc1 = torch.sum(torch.square(torch.mul( mat_1,(T - 1) ) ) )/N_1
    bc2 = torch.sum(torch.square(torch.mul( mat_2,(T) ) ) )/N_2
    ic1 = torch.sum(torch.square(torch.mul( mat_3,(T) ) ) )/N_3
    
    fitness = 1/(eq1 + bc1 + bc2 + ic1)
    loss = eq1 + bc1 + bc2 + ic1
    
    return fitness.detach().numpy(), loss

def tour_selection(scores, num_pop, selection_k):
    
    a = np.random.randint( 0, num_pop, size=selection_k, dtype=int)
    
    # Initialise first and second index
    first_idx = 1
    second_idx = 0
    if a[0]>a[1]:
        first_idx = 0
        second_idx = 1
       
    # Tournament selection loop
    for i in range(2, selection_k):
        if a[i]>a[first_idx]:
            first_idx = i
            continue
        if a[i]<a[first_idx] and a[i]>a[second_idx]:
            second_idx = i
            continue
            
    return first_idx, second_idx

def crossover(pop_wts, pop_bia, first_idx, second_idx):
    
    # choose crossover index for weights and biases
    weight_cut = np.random.randint( 0, num_weights, dtype=int)
    bias_cut = np.random.randint( 0, num_biases, dtype=int)
    
    # Crossover of weights
    a = pop_wts[first_idx].copy()
    b = pop_wts[second_idx].copy()
    pop_wts[first_idx] = np.concatenate( (a[0:weight_cut], b[weight_cut:]) )
    pop_wts[second_idx] = np.concatenate( (b[0:weight_cut], a[weight_cut:]) )
    
    # Crossover of biases
    a = pop_bia[first_idx].copy()
    b = pop_bia[second_idx].copy()
    pop_bia[first_idx] = np.concatenate( (a[0:bias_cut], b[bias_cut:]) )
    pop_bia[second_idx] = np.concatenate( (b[0:bias_cut], a[bias_cut:]) )
    
    return pop_wts, pop_bia

def scramble_mutation(pop_wts, pop_bia, first_idx, second_idx, mutation_wts, mutation_bia, num_weights, num_biases):
    
    # mutate first child
    a = np.random.randint( 0, num_weights - mutation_wts, dtype=int)
    np.random.shuffle( pop_wts[first_idx][a:a + mutation_wts] )
    a = np.random.randint( 0, num_biases - mutation_bia, dtype=int)
    np.random.shuffle( pop_bia[first_idx][a:a + mutation_bia] )
    
    # mutate second child
    a = np.random.randint( 0, num_weights - mutation_wts, dtype=int)
    np.random.shuffle( pop_wts[second_idx][a:a + mutation_wts] )
    a = np.random.randint( 0, num_biases - mutation_bia, dtype=int)
    np.random.shuffle( pop_bia[second_idx][a:a + mutation_bia] )
    
    return pop_wts, pop_bia

def load_wts_bia(model, wts, bia):
    
    model.l1.weight.data = torch.FloatTensor( wts[0:10].reshape((5, 2)) )
    model.l2.weight.data = torch.FloatTensor( wts[10:35].reshape((5, 5)) )
    model.l3.weight.data = torch.FloatTensor( wts[35:].reshape((1, 5)) )
    
    model.l1.bias.data = torch.FloatTensor( bia[0:5].reshape((5)) )
    model.l2.bias.data = torch.FloatTensor( bia[5:10].reshape((5)) )
    model.l3.bias.data = torch.FloatTensor( bia[10].reshape((1)) )
    
def get_wts_bia(model, wts, bia):
    
    w1 = model.l1.weight.data.flatten().detach().numpy() 
    w2 = model.l2.weight.data.flatten().detach().numpy() 
    w3 = model.l3.weight.data.flatten().detach().numpy() 
    wts = np.concatenate((w1, w2, w3))
    
    b1 = model.l1.bias.data.flatten().detach().numpy() 
    b2 = model.l2.bias.data.flatten().detach().numpy() 
    b3 = model.l3.bias.data.flatten().detach().numpy() 
    bia = np.concatenate((b1, b2, b3))
    
    return wts, bia

In [3]:
N_x = 40
N_t = 40

x_l = 0
x_r = 0.5

t_i = 0
t_f = 0.6

# Genetic algorithm
num_pop = 50
num_gen = 200
selection_k = 10
mutation_wts = 7
mutation_bia = 3

# material params
k1 = 0.05

# Neural network params
layer_size = [2, 2, 1]
mean = 0
std = 0.3

# Neural Network architecture
model = ANN(layer_size)
print(model)
total_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Total trainable parameters in the model:", total_trainable_params)
num_weights, num_biases = num_weights_biases(model)
print(f"Number of weights: {num_weights}")
print(f"Number of biases: {num_biases}")

# Setup Loss function and Optimiser
lr = 5e-3
epochs = 150
optimiser1 = torch.optim.Adam(model.parameters(), lr=lr)

ANN(
  (l1): Linear(in_features=2, out_features=5, bias=True)
  (l2): Linear(in_features=5, out_features=5, bias=True)
  (l3): Linear(in_features=5, out_features=1, bias=True)
)
Total trainable parameters in the model: 51
Number of weights: 40
Number of biases: 11


In [4]:
best_fitness = []

# initialise population
pop_wts, pop_bia = generate_population(num_pop, mean, std, num_weights, num_biases)

# initialise training data and matrices 
x_train = x_train_data(N_x, N_t, x_l, x_r)
t_train = t_train_data(N_x, N_t, t_i, t_f)
N = x_train.shape[0]
print("N = ", N)
mat_3 = torch.mul( torch.where(t_train == t_i,1,0), torch.where(x_train != 0,1,0) )
N_3 = torch.sum( mat_3 )
print("N_3 = ", N_3)
mat_2 = torch.where(x_train == x_r,1,0)
N_2 = torch.sum(mat_2)
print("N_2 = ", N_2)
mat_1 = torch.where(x_train == x_l,1,0)
N_1 = torch.sum(mat_1)
print("N_1 = ", N_1)

for gen in range(num_gen):
    
    ################### Gradient Based Algorithm #####################
    for i in range(num_pop):
        
        # Load wts and biases
        load_wts_bia(model, pop_wts[i], pop_bia[i])
        
        for j in range(epochs):
        
            # Evaluate loss
            _, loss = eval_fitness_loss(x_train, t_train, k1, mat_1, mat_2, mat_3, N, N_1, N_2, N_3) 
            optimiser1.zero_grad()
            loss.backward()
            optimiser1.step()
            
        # get weights and store them
#         print('i = ',i,', loss = ', loss.detach().numpy())
        wts, bia = get_wts_bia(model, pop_wts[i], pop_bia[i])
        pop_wts[i] = wts
        pop_bia[i] = bia
        
    ################### Genetic Algorithm #####################
    scores = []
    for i in range(num_pop):

        # Load wts and biases
        load_wts_bia(model, pop_wts[i], pop_bia[i])

        # Evaluate fitness
        fitness, _ = eval_fitness_loss(x_train, t_train, k1, mat_1, mat_2, mat_3, N, N_1, N_2, N_3) 
        scores.append(fitness)
        
    # Store results
    best_fitness.append(max(scores))
    sol_idx = scores.index(best_fitness[-1])
    
    if gen%10==0:
        print('Generation = ',gen,', Best Fitness = ', best_fitness[-1])
        print('Solution Index = ', sol_idx)
    
    # Tournament selection
    first_idx, second_idx = tour_selection(scores, num_pop, selection_k)

    # Crossover
    pop_wts, pop_bia = crossover(pop_wts, pop_bia, first_idx, second_idx)
    
    # Mutation
    pop_wts, pop_bia = scramble_mutation(pop_wts, pop_bia, first_idx, second_idx, mutation_wts, mutation_bia, num_weights, num_biases)

N =  1600
N_3 =  tensor(39)
N_2 =  tensor(40)
N_1 =  tensor(40)
Generation =  0 , Best Fitness =  0.94889164
Solution Index =  1
Generation =  10 , Best Fitness =  4.321183
Solution Index =  48
Generation =  20 , Best Fitness =  5.467894
Solution Index =  48
Generation =  30 , Best Fitness =  6.2849503
Solution Index =  48
Generation =  40 , Best Fitness =  6.6559963
Solution Index =  48
Generation =  50 , Best Fitness =  6.9747305
Solution Index =  46
Generation =  60 , Best Fitness =  7.2436576
Solution Index =  46
Generation =  70 , Best Fitness =  7.5133176
Solution Index =  26


KeyboardInterrupt: 