-
Notifications
You must be signed in to change notification settings - Fork 6
/
CUDA_parallel_GA.py
189 lines (145 loc) · 6.61 KB
/
CUDA_parallel_GA.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
#-------- Implementation of basic GPU-parallel Genetic Algorithm for with python CUDA using Numba ---------#
import numpy as np
from numba import cuda
import time
import random
import math
#-------- Verify CUDA access ---------#
print(cuda.gpus)
#-------- Parallel kernel function using CUDA ---------#
@cuda.jit
def eval_genomes_kernel(chromosomes, fitnesses, pop_length, chrom_length):
# Thread id in a 1D block
tx = cuda.threadIdx.x
# Block id in a 1D grid
ty = cuda.blockIdx.x
# Block width, i.e. number of threads per block
bw = cuda.blockDim.x
# Compute flattened index inside the array
pos = tx + ty * bw
if pos < pop_length: # Check array boundaries
# in this example the fitness of an individual is computed by an arbitary set of algebraic operations on the chromosome
num_loops = 3000
for i in range(num_loops):
fitnesses[pos] += chromosomes[pos*chrom_length + 1] # do the fitness evaluation
for i in range(num_loops):
fitnesses[pos] -= chromosomes[pos*chrom_length + 2]
for i in range(num_loops):
fitnesses[pos] += chromosomes[pos*chrom_length + 3]
if (fitnesses[pos] < 0):
fitnesses[pos] = 0
#-------- Plain evaluation function, not parallel ---------#
def eval_genomes_plain(chromosomes, fitnesses):
for i in range(len(chromosomes)):
# in this example the fitness of an individual is computed by an arbitary set of algebraic operations on the chromosome
num_loops = 3000
for j in range(num_loops):
fitnesses[i] += chromosomes[i][1] # do the fitness evaluation
for j in range(num_loops):
fitnesses[i] -= chromosomes[i][2]
for j in range(num_loops):
fitnesses[i] += chromosomes[i][3]
if (fitnesses[i] < 0):
fitnesses[i] = 0
#-------- Function to compute next generation in Genetic Algorithm ---------#
#-------- Performs Selection, Crossover, and Mutation operations ---------#
def next_generation(chromosomes, fitnesses):
fitness_pairs = []
fitnessTotal = 0.0
for i in range(len(chromosomes)):
fitness_pairs.append( [chromosomes[i], fitnesses[i]] )
fitnessTotal += fitnesses[i]
fitnesses = list(reversed(sorted(fitnesses))) #fitnesses now in descending order
sorted_pairs = list(reversed(sorted(fitness_pairs, key=lambda x: x[1])))
new_chromosomes = np.zeros(shape=(pop_size, chrom_size), dtype = np.float32)
#new_brains_fitnesses = []
#create roulette wheel from relative fitnesses for fitness-proportional selection
rouletteWheel = []
fitnessProportions = []
for i in range(len(chromosomes)):
fitnessProportions.append( float( fitnesses[i]/fitnessTotal ) )
if(i == 0):
rouletteWheel.append(fitnessProportions[i])
else:
rouletteWheel.append(rouletteWheel[i - 1] + fitnessProportions[i])
#Generate new population with children of selected chromosomes
for i in range(len(chromosomes)):
#Fitness Proportional Selection
spin1 = random.uniform(0, 1) # A random float from 0.0 to 1.0
spin2 = random.uniform(0, 1) # A random float from 0.0 to 1.0
j = 0
while( rouletteWheel[j] <= spin1 ):
j += 1
k = 0
while( rouletteWheel[k] <= spin2 ):
k += 1
genome_copy = sorted_pairs[j][0] #Genome of parent 1
genome_copy2 = sorted_pairs[k][0] #Genome of parent 2
#create child genome from parents (crossover)
index = random.randint(0, len(genome_copy) - 1)
index2 = random.randint(0, len(genome_copy2) - 1)
child_sequence = []
for y in range(math.floor(len(genome_copy) / 2)):
child_sequence.append( genome_copy[ (index + y) % len(genome_copy) ] )
for y in range(math.floor(len(genome_copy2)/ 2)):
child_sequence.append( genome_copy2[ (index2 + y) % len(genome_copy2) ] )
child_genome = np.zeros(len(chromosomes[0]), dtype=np.float32)
#mutate genome
for a in range(len(child_sequence)):
if random.uniform(0,1) < 0.01: # 1% chance of a random mutation
child_genome[a] = random.uniform(0,1)
else:
child_genome[a] = child_sequence[a]
#Add add new chromosome to next population
new_chromosomes[i] = child_genome
#Replace old chromosomes with new
for i in range(len(chromosomes)):
for j in range(len(chromosomes[0])):
chromosomes[i][j] = new_chromosomes[i][j]
#-------- Initialize Population ---------#
random.seed(1111)
pop_size = 5000
chrom_size = 10
num_generations = 5
fitnesses = np.zeros(pop_size, dtype=np.float32)
chromosomes = np.zeros(shape=(pop_size, chrom_size), dtype = np.float32)
for i in range(pop_size):
for j in range(chrom_size):
chromosomes[i][j] = random.uniform(0,1) #random float between 0.0 and 1.0
#-------- Measure time to perform some generations of the Genetic Algorithm without CUDA ---------#
print("NO CUDA:")
start = time.time()
# Genetic Algorithm on CPU
for i in range(num_generations):
print("Gen " + str(i) + "/" + str(num_generations))
eval_genomes_plain(chromosomes, fitnesses)
next_generation(chromosomes, fitnesses) #Performs selection, mutation, and crossover operations to create new generation
fitnesses = np.zeros(pop_size, dtype=np.float32) #Wipe fitnesses
end = time.time()
print("time elapsed: " + str((end-start)))
print("First chromosome: " + str(chromosomes[0])) #To show computations were the same between both tests
#-------- Prepare kernel ---------#
# Set block & thread size
threads_per_block = 256
blocks_per_grid = (chromosomes.size + (threads_per_block - 1))
#--------- Initialize population again for a new run -------------- #
random.seed(1111)
fitnesses = np.zeros(pop_size, dtype=np.float32)
chromosomes = np.zeros(shape=(pop_size, chrom_size), dtype = np.float32)
for i in range(pop_size):
for j in range(chrom_size):
chromosomes[i][j] = random.uniform(0,1) #random float between 0.0 and 1.0
#-------- Measure time to perform some generations of the Genetic Algorithm with CUDA ---------#
print("CUDA:")
start = time.time()
# Genetic Algorithm on GPU
for i in range(num_generations):
print("Gen " + str(i) + "/" + str(num_generations))
chromosomes_flat = chromosomes.flatten()
eval_genomes_kernel[blocks_per_grid, threads_per_block](chromosomes_flat, fitnesses, pop_size, chrom_size)
next_generation(chromosomes, fitnesses) #Performs selection, mutation, and crossover operations to create new generation
fitnesses = np.zeros(pop_size, dtype=np.float32) # Wipe fitnesses
end = time.time()
print("time elapsed: " + str((end-start)))
print("First chromosome: " + str(chromosomes[0])) #To show computations were the same between both tests
#-------------------------------------------------------#