In [1]:
import numpy as np 
import random 
from network import Network
from sklearn.utils.extmath import softmax
from statistics import mean

def _create_population(nn_sturcture, size=100):
    """
    size: population size, number of chromesomes 

    return population, which is a list of random created network graphs
    """
    population = []
    for _ in range(size):
        nn = Network(nn_sturcture)
        population.append(nn)
#         print("population id", _)
    return population

def encode(weights, bias):
    """
    encode genes to a chromosome
    """
    gene=[]
#     assert len(weights)==len(bias), "length of weiths not equal to length of bias"
    n_layers = len(bias)
    for i in range(n_layers):
        gene.extend(weights[i].flatten())
        gene.extend(bias[i].flatten())
    return gene 
         
def decode(gene,structure):
    """
    decoed genes of a chromosome
    structure is the network structure, e.g.
    structure = [dim_input[,],5,10,n_classes]
    """
    Weights ={}
    Bias={}
    
    gene_index = 0
    input_dim = structure[0][1]
    input_num = structure[0][0]
    
    w_stuct = [input_dim] #.extend(structure[1:])
    
    w_stuct.extend(structure[1:])
#     print("weights structure", w_stuct)
    for index in range(len(w_stuct)-1):
        weight = np.zeros((w_stuct[index],w_stuct[index+1]))
        bias=[]
        
        for i in range(w_stuct[index]):
            for j in range(w_stuct[index+1]):
                weight[i][j] = gene[gene_index]
                gene_index+=1
                
        bias=np.array(gene[gene_index:gene_index+input_num*w_stuct[index+1]])
        bias = np.reshape(bias,(input_num, w_stuct[index+1]))
        gene_index+= input_num*w_stuct[index+1]
        Weights[index] = weight
        Bias[index] = bias
    print("w shape", weight.shape, "bias shape", bias.shape)
        
    return Weights, Bias
           
    
def mutate(chro, mutation_rate):
    #for each gene, it can decide to change itself to the mean of its nearby genes or not
    new_chro = []
        
    for i in range(len(chro)-1):
        xx = (chro[i]+chro[i-1]+chro[i+1] )/3
        new_chro.append(xx)
    return new_chro
    

def crossover(chro1, chro2):
    # create a random crossover point
    assert len(chro1) ==len(chro2), "chromosome not the same length"
    index = np.random.randint(low=1, high=len(chro1))
    print("index", index)
    temp1 = chro1[index:]
    temp3 = chro1[:index]
    temp2 = chro2[:index]
    temp4 = chro2[index:]
    
#     print("temp1",temp1)
    #random combine half of chro1 genes and half of chro2 genes below
    rd = np.random.choice([1,2,3,4])
#     print("rd", rd)
    new_chro=[]
    
    if rd ==1:
        new_chro = temp1+temp2
#         print(new_chro)
    if rd ==2:
        new_chro = temp2+temp1
    if rd ==3:
        new_chro = temp3+temp4
    if rd==4:
        new_chro = temp4+temp3
    
    temp1,temp2,temp3,temp4=[],[],[],[]
        
#     print("new chro", len(new_chro), len(chro1), len(chro2))
    return new_chro
    

def selection(parents,pop):
    chirldren=[[],[]] #get two chirldren
    chrome={}
#     print("length parents", len(parents))
    for i in range(len(parents)):
        parent_id = parents[i][0]
        nn=pop[parent_id]
        weights = nn.getWeights()
        bias = nn.getBias()
        gene = encode(weights, bias)
        chrome[i] = gene
#     print("chrome 1", chrome[1])
    chirldren[0] = crossover(chrome[0], chrome[1])
    chirldren[1] = crossover(chrome[2],chrome[3])
    
#     print("chirldren", chirldren)
    
    return chirldren
           
         
def GeneticAlgorithmTrainner(X,y, nn_structure, episodes = 10000, mutation_rate = 0.1):
    # first generation 
    pop = _create_population(nn_structure)
    fit_score= []
    
    avg_fit_scores=[]
    for i in range(len(pop)):
        nn = pop[i]
        score = fitness(nn,X,y)
        fit_score.append([i,score])
    
    
    fit_score=np.array(fit_score)
    fit_score.sort(axis=0)
    temp_ = []
    fit_score =np.flip(fit_score,0)
    for ele in fit_score:
        temp_.append([int(ele[0]),ele[1]])
#     print("score",temp_)
    fit_score = temp_
#     print("fit score", fit_score)
    
    len_pop=[]
    
    # select the nn with hightest 4 fitness scores
    for epi in range(episodes):
        
        sc = np.sum(fit_score,axis=0)[1]/len(fit_score)
        avg_fit_scores.append(sc)
        print("fitness score",  sc)
        luckyest = fit_score[:4]
#         print("luck 4",luckyest)
        
        #remove two worst case
#         worst2= fit_score[-2:]
#         for s in worst2:
#             del pop[s[0]]
        offspring = selection(luckyest, pop)
        
        for kid in offspring:
            #mutation
#             print("kid chrom", kid)
            p = np.random.random_sample()
            if p <mutation_rate:
                kid = mutate(kid, mutation_rate )
            Weights, Bias = decode(kid,structure)
            nn = Network(nn_structure)
            nn.setWeights(Weights)
            nn.setBias(Bias)
            pop.append(nn)
        fit_score= []
#         len_pop.append(len(pop))
        
#         if len_pop[epi] ==len_pop[epi-1]:
#             #converged 
#             print("converged")
#             break 
        #calculate new fitness score 
    
        print("here")
        for i in range(len(pop)):
            nn = pop[i]
            score = fitness(nn,X,y)
            fit_score.append([i,score])
            
        fit_score=np.array(fit_score)
        fit_score.sort(axis=0)
        
        score = []
        fit_score =np.flip(fit_score,0)
        for ele in fit_score:
            score.append([int(ele[0]),ele[1]])
        fit_score = score
        print("episodes end", epi)
    return avg_fit_scores
        
def fitness(nn, X, y):
    ## 1/ cross_entropy 
    W = nn.getWeights()  #W is a dict, W[i] = [[n_layer_nodes, n-1_layer_nodes]]
    b = nn.getBias() # b is a dict, b[i] = [bias of layer n]
    z=[]
    a = []
    probs = []
    n_layer = len(W)
    

    
    for i in range(n_layer):
#         print("x shape", X.shape, "y shape", y.shape)
#         print("w shape", W[i].shape, "bshape", b[i].shape)
#         print("w shape", W[i].shape, "b shape", b[i].shape)
        if i ==0: 
            z = np.matmul(X, W[i])+b[i]
            a = np.tanh(z)
        elif i< n_layer-1:
            z = np.matmul(a,W[i])+b[i]
            a = np.tanh(z)
        else: # softmax
            z = np.matmul(a, W[i])+b[i]
            probs = softmax(z)
    probs = np.array(probs)
#     print("probs shape", probs.shape)
    # use softmax to calculate cross entropy below:
    ones = np.ones([1, len(y[0])])
    m = len(y[0])
    loss = -(1.0/m) * np.sum((np.dot(y, np.log(probs)) + np.dot(ones-y, np.log(1-probs))))
#     log_likelihood = -np.log(probs[range(m),y])
#     loss = np.sum(log_likelihood) / m
    fit = 1/loss
    return fit
    
                

In [2]:
#load data below

with open('seeds_dataset.txt', 'r') as f:
    x = f.readlines()
data=[]

for line in x:
    temp=[]
    temp.append(line.strip().split())
    l = []
    
    for ele in temp[0]:
        l.append(float(ele))
    data.append(l)
#     print(l)
data = np.array(data)
np.random.shuffle(data)
print(data)

[[20.97   17.25    0.8859 ...  4.677   6.316   2.    ]
 [12.74   13.67    0.8564 ...  2.504   4.869   1.    ]
 [18.14   16.12    0.8772 ...  3.619   6.011   2.    ]
 ...
 [17.12   15.55    0.8892 ...  2.858   5.746   2.    ]
 [14.33   14.28    0.8831 ...  3.328   5.224   1.    ]
 [11.75   13.52    0.8082 ...  4.378   5.31    3.    ]]


In [3]:
train_X = []
train_y = []
test_X=[]
test_y=[]
len_data = len(data)
train_data = data[:int(len_data*0.8)]
test_data = data[int(len_data*0.8):]
train_X = np.array(train_data[:, [column for column in range(len(train_data[0])-1)]])
test_X  = np.array(test_data[:, [column for column in range(len(train_data[0])-1)]])
train_y = np.array([train_data[:,len(train_data[0])-1 ]])
test_y = np.array([test_data[:,len(train_data[0])-1 ]])

print(train_X.shape)
print(test_y.shape)

(168, 7)
(1, 42)


In [4]:
print(train_X[:10])

[[20.97   17.25    0.8859  6.563   3.991   4.677   6.316 ]
 [12.74   13.67    0.8564  5.395   2.956   2.504   4.869 ]
 [18.14   16.12    0.8772  6.059   3.563   3.619   6.011 ]
 [10.79   12.93    0.8107  5.317   2.648   5.462   5.194 ]
 [18.94   16.49    0.875   6.445   3.639   5.064   6.362 ]
 [12.76   13.38    0.8964  5.073   3.155   2.828   4.83  ]
 [13.34   13.95    0.862   5.389   3.074   5.995   5.307 ]
 [10.8    12.57    0.859   4.981   2.821   4.773   5.063 ]
 [11.83   13.23    0.8496  5.263   2.84    5.195   5.307 ]
 [11.48   13.05    0.8473  5.18    2.758   5.876   5.002 ]]


In [5]:
n_classes = 3
n_examples = len(train_X)
dim_input = train_X.shape
print(dim_input)
structure = [dim_input,5,10,n_classes]

(168, 7)


In [6]:
fit_data = GeneticAlgorithmTrainner(train_X,train_y,structure)

fitness score 0.07794583397144715
index 624
index 2778
w shape (10, 3) bias shape (168, 3)
w shape (10, 3) bias shape (168, 3)
here
episodes end 0
fitness score 0.07809676010185584
index 58
index 1425
w shape (10, 3) bias shape (168, 3)
w shape (10, 3) bias shape (168, 3)
here
episodes end 1
fitness score 0.07815847238769448
index 860
index 1891
w shape (10, 3) bias shape (168, 3)
w shape (10, 3) bias shape (168, 3)
here
episodes end 2
fitness score 0.0782565048784615
index 1358
index 472


ValueError: cannot reshape array of size 503 into shape (168,3)

In [None]:
print(fit_data)