In [1]:
# Python 3.8.3
import deap # 1.3.1
from deap import gp, creator, base, tools, algorithms

import numpy as np # 1.18.5

import operator, random

import matplotlib.pyplot as plt

filename = "./Datasets/yeast.data"

with open(filename) as f:
    content = f.read().splitlines()
    
data = [] # data for saving the features from file
names = [] # names of the protein classes
    
for line in content:
    tmp = line.split()
    tmp.pop(0) # pops the first element - we dont need it
    
    names.append(tmp.pop()) # pops the last element - name of protein
    
    data.append(np.asarray(tmp, float)) # extracting features to data as float values


# Protected division
def div(x, y):
    if(y == 0):
        return 1
    else:
        return x/y

# If then operator - promijenila sam naziv iz if_then u lf (tako je tražilo cxSemantic)
def lf(a, b, c):
    if a < 0:
        return b
    else:
        return c

pset = gp.PrimitiveSet("main", arity=8)

# Adding primitive operators
pset.addPrimitive(operator.add, 2)
pset.addPrimitive(operator.sub, 2)
pset.addPrimitive(operator.mul, 2)
pset.addPrimitive(div, 2) 
pset.addPrimitive(lf, 3, name="lf") #promijenila naziv

expr = gp.genHalfAndHalf(pset, min_=1, max_=8)
tree = gp.PrimitiveTree(expr)

# print(tree)

creator.create("Fitness", base.Fitness, weights=(1.0, 1.0))

# Create an individual of type primitive tree and our fitness function, pset is a previously defined
creator.create("Individual", gp.PrimitiveTree, fitness=creator.Fitness, pset=pset)



# Creating toolbox
toolbox = base.Toolbox()


toolbox.register("expr", gp.genHalfAndHalf, pset=pset, min_=1, max_=8) # genHalfAndHalf - Generate an expression with a PrimitiveSet pset. Half the time, the expression is generated with genGrow(), the other half, the expression is generated with genFull()
toolbox.register("individual", tools.initIterate, creator.Individual, toolbox.expr) # Register individual from expression

toolbox.register("population", tools.initRepeat, list, toolbox.individual)

# toolbox.population(n=10)

#toolbox.register("population", tools.initRepeat, gp.PrimitiveTree, toolbox.individual)
population = toolbox.population(n=20)

# ind1 = toolbox.individual()
# print (ind1)       #test printanje jedinke       
# print (ind1.fitness.valid)

# Evaluation function
def evaluateAccuracy(individual, data, names, pset): # po predlosku
    
    #Compiling the expression
    classificator = gp.compile(individual, pset)
    
    minority_count = names.count("MIT")
    true_positive_count = 0
    
    majority_count = len(names) - minority_count
    true_negative_count = 0
    
    for (d, n) in zip(data, names):
        # feeding data to individual classificator
        result = classificator(*d)
        
        #counting right answers
        if (n=="MIT") and (result >= 0): true_positive_count += 1
        elif (n!="MIT") and (result < 0): true_negative_count += 1
        
    # return accuracy on minority class and majority class
    return (true_positive_count/minority_count, true_negative_count/majority_count)


toolbox.register('select', tools.selTournament, tournsize = 7)
# toolbox.register('select', tools.selSPEA2)
# Register mate function
toolbox.register('mate', gp.cxSemantic, pset=pset, min=1, max=8) #pisalo je da je ovaj OK za stabla

#Register mutate function
toolbox.register('mutate', gp.mutSemantic, pset=pset, min=1, max=8)#i ovaj je kao OK za stabla

toolbox.register('evaluate', evaluateAccuracy, data=data, names=names, pset=pset)

hof = tools.HallOfFame(10) #hall of fame

stats = tools.Statistics(lambda ind: ind.fitness.values) #statistika koju baš i ne kužim
stats.register("avg", np.mean)
stats.register("std", np.std)
stats.register("min", np.min)
stats.register("max", np.max)

algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=10, stats=stats, halloffame=hof)
# log = algorithms.eaSimple(population, toolbox, cxpb=0.5, mutpb=0.2, ngen=7, stats=stats, halloffame=hof)
#print(hof)

#printaj 3 hall of fame jedinke

for i in hof:
    print(i)
    print(i.fitness.values)
    print("\n")

#print(log)


gen	nevals	avg     	std     	min	max
0  	20    	0.483242	0.406366	0  	1  
1  	11    	0.5     	0.5     	0  	1  
2  	18    	0.5     	0.5     	0  	1  
3  	12    	0.499874	0.499516	0  	1  
4  	13    	0.499963	0.499557	0  	1  
5  	9     	0.501119	0.497781	0  	1  
6  	11    	0.498935	0.486647	0  	1  
7  	11    	0.502419	0.497598	0  	1  
8  	11    	0.511022	0.483268	0  	1  
9  	16    	0.502285	0.483332	0  	1  
10 	12    	0.508731	0.479505	0  	1  
add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1.0, 0.012096774193548387)


add(sub(ARG4, ARG0), lf(ARG0, ARG3, ARG2))
(1