In [135]:
import numpy as np
import random
import pandas as pd
import math
from random import randint

In [136]:
def _readfile(file):

    # ----- read data -----
    df = pd.read_csv(file)
    df = df.fillna(method = 'ffill')

    # ----- Create Features -----
    X = df.drop(['id','class'], axis=1).copy(deep=False)
    X = (X-X.min())/(X.max()-X.min())

    # ----- Create Desired outputs -----
    Y = df[['class']].copy(deep=False)

    Input = X.to_numpy()
    Output = Y.to_numpy()

    return Input,Output

Input,Output = _readfile('data/wdbc.csv')

In [137]:
def cross_validations_split(shape,folds):
    fold_size = int(shape * folds/100)
    k = 0
    index = []
    for i in range(1,folds+1):
        if i < folds:
            index.append([k,i*fold_size])
        else:
            index.append([k,shape])
        k = i*fold_size
    return index

In [138]:
class GA(object):
    def __init__(self, hiddenSize, inputSize, outputSize):

        # initiate layers
        self.inputSize = inputSize
        self.outputSize = outputSize
        self.hiddenSize = hiddenSize
        
        layers = [self.inputSize] + self.hiddenSize + [self.outputSize]
        # initiate genes
        genes = []
        for i in range(len(layers)-1):
            g = np.random.uniform(-2,2,(layers[i], layers[i+1]))
            genes.append(g)
        self.genes = genes
        self.layers = layers

    def feedForward(self, X):
        Output_node = X
        for i, g in enumerate(self.genes):
        
            v = np.dot(Output_node, g)
            Output_node = self.sigmoid(v)

        return Output_node

    def sigmoid(self, s, deriv=False):
        if (deriv == True):
            return s * (1-s)
        return 1/(1 + np.exp(-s))

    def object_funct(self, X, Y):
        self.acc = 0
        seed = randint(1, 100*100)

        np.random.seed(seed)
        np.random.shuffle(X)

        np.random.seed(seed)
        np.random.shuffle(Y)

        sum_err = 0
        for j, input in enumerate(X):
            target = Y[j]
            output = self.feedForward(input)

            if abs(output-target) < 0.5:
                    self.acc += 1

            sum_err += self._mse(target, output)

        self.fx = 1/math.log(1+sum_err/len(X))
        
        return self.fx,self.acc*100/len(X)

    def _mse(self, target, output):
        return np.average(abs(target - output)**2)


In [139]:
class selection(object):
    def __init__(self, chromosomes):

        # initiate fitness
        self.chromosomes = chromosomes
        self.Max = 1.2
        self.Min = 2-self.Max
    
    # initiate probability
    def prob(self):

        P = []
        N = len(chromosomes)
        Max = self.Max
        Min = self.Min

        for r,_ in enumerate(self.chromosomes):
            p = (Min + (Max - Min) * ((r-1)/(N-1)))/N
            P.append(p)

        return P

    def expect_values(self):

        ni = []
        P = self.prob()
        
        for p in P:
            ni.append(p*len(self.chromosomes))

        return ni

    # Stochastic universal sampling
    def Stochastic_sampling(self):

        ni = self.expect_values()
        answers = []
        index = []
        ptr = np.random.uniform(0,1,1)[0]
        sum_pi = 0

        for i in range(len(self.chromosomes)):
            sum_pi += ni[i]
            while sum_pi > ptr :
                index.append(i)
                ptr+=1
        for i in index:
            #print(self.chromosomes[i])
            answers.append(self.chromosomes[i][1])
            
        return answers

In [140]:
def Crossover_chromosomes(mating_pool,no_chromosomes):

    new_generation = []
    Hidden,Input,Output = mating_pool[0].hiddenSize,mating_pool[0].inputSize,mating_pool[0].outputSize
    
    t = random.randint(int(len(mating_pool)*0.8), no_chromosomes)

    for i in range(t):
        
        # random cuple 
        couple = random.sample(mating_pool, 2) 

        # crossover
        child = GA(Hidden,Input,Output)
        for i, g in enumerate(child.genes):
            for j in range(g.shape[1]):
                k = random.randint(0, 1)
                #print(g[:,j].shape," index : ",j)
                #print(couple[k].genes[i][:,j].shape," index : ",j)
                g[:,j] = couple[k].genes[i][:,j].copy()
        
        new_generation.append(child)

    # if size of new_gen != mating_pool
    while len(new_generation) < no_chromosomes:
        child = random.choice(mating_pool)
        new_generation.append(child)

    return new_generation


In [141]:
def Mutation(chromosomes):
    next_generation = chromosomes.copy()
    for c in next_generation:
        for i, g in enumerate(c.genes):
            for j in range(g.shape[1]):
                k = random.randint(0, 4) # 25% for mutate
                if k == 1: # is mutate
                    mutate = np.random.uniform(-1,1,(g.shape[0]))
                    g[:,j] += mutate
    
    return next_generation

In [142]:
chromosomes = []
no_generations = 100
no_chromosomes = 100

In [143]:
# initiate chromosomes P(0)
for i in range(no_chromosomes):
    c = GA([30,18,9,4,2],Input.shape[1],Output.shape[1])
    chromosomes.append(c)
    
# initiate best objective function
best_chromosome = [float('-inf'),chromosomes[0]]

In [144]:

obj_per_fold = []
best_per_fold = []

test_per_fold = []
acc_per_fold = []
for a,b in cross_validations_split(Input.shape[0],10):
    x_train = np.concatenate((Input[:a],Input[b+1:]))
    y_train = np.concatenate((Output[:a],Output[b+1:]))
    x_test = Input[a:b,:]
    y_test = Output[a:b]
    for t in range(1,no_generations+1):

        #print("-------------------- Evaluation -- P[",t,"] --------------------")

        # evaluation objective function
        ranks = []
        mean_obj = []
        # train neural network with GA
        for c in chromosomes:

            fx,_ = c.object_funct(x_train, y_train)
            mean_obj.append(fx)
            ranks.append([fx,c])

            if fx > best_chromosome[0]:
                print("-- update best fitenss ",round(fx,6))
                best_chromosome[0] = fx
                best_chromosome[1] = c

        # Linear ranks selection
        ranks = sorted(ranks, key = lambda x: (x[0]))
        mating_pool = selection(ranks).Stochastic_sampling()

        # Crossover 
        c_chromosomes = Crossover_chromosomes(mating_pool,no_chromosomes)

        # Mutation
        new_generations = Mutation(c_chromosomes)

        # P(t) = P(t+1)
        chromosomes = new_generations.copy()

        print("---- average fitness",t," : ", np.mean(mean_obj))
    
    mean_test = []
    acc_test = []
    for c in chromosomes:
        fx,acc = c.object_funct(x_test, y_test)
        mean_test.append(fx)
        acc_test.append(acc)

    print("** test fitness : ",np.mean(mean_test))
    print("** aac test fitness : ",np.mean(acc_test))

    test_per_fold.append(np.mean(mean_test))
    acc_per_fold.append(np.mean(acc_test))
    obj_per_fold.append(np.mean(mean_obj))
    best_per_fold.append(best_chromosome[0])

    print("##################### save csv. #####################")
    result = pd.DataFrame({'best_chromosome': best_per_fold,'train_average': obj_per_fold,'test_average': test_per_fold,'test_accuracy': acc_per_fold})
    result.to_csv('GA2.csv', encoding='utf-8')


tness 81  :  4.681475335445953
---- average fitness 82  :  4.635427359141776
---- average fitness 83  :  4.779757644535923
---- average fitness 84  :  4.8234957133953165
---- average fitness 85  :  4.8056403057224735
---- average fitness 86  :  4.836322857175248
---- average fitness 87  :  4.868134755268714
---- average fitness 88  :  4.83331900305816
---- average fitness 89  :  4.725722521430836
---- average fitness 90  :  4.89072748095992
---- average fitness 91  :  4.797623522302488
---- average fitness 92  :  4.8640422667703085
---- average fitness 93  :  4.922905854243804
-- update best fitenss  8.284556
---- average fitness 94  :  5.038538383822145
-- update best fitenss  8.903505
---- average fitness 95  :  4.88774108393865
---- average fitness 96  :  4.855754490909925
---- average fitness 97  :  4.977478611474251
---- average fitness 98  :  4.934871510429819
---- average fitness 99  :  4.973656164379582
---- average fitness 100  :  5.083313145287887
** test fitness :  5.7068239

In [145]:
result = pd.DataFrame({'best_chromosome': best_per_fold,'train_average': obj_per_fold,'test_average': test_per_fold,'test_accuracy': acc_per_fold})
result.to_csv('GA2.csv', encoding='utf-8')