In [None]:
# 使用《复杂》中罗比扫地机器人的例子，进行遗传算法编程
# 场景说明：
# 场地总共是10×10个格子，每个格子可能有三种情况：
#    0. 空
#    1. 罐子
#    2. 墙
# 罗比是一个机器人，他每次都只能看到周围和当前格子的情况
# 也就是其周围“上下左右中”的情况，按照顺序记入一个列表：
#    [0, 0, 0, 0, 0] -> 周围都是空的
#    [2, 0, 0, 1, 0] -> 上面是墙，右面有罐子
# 针对不同情况他的策略是:
#    0:向上走
#    1:向下走
#    2:向左走
#    3:向右走
#    4:随机走
#    5:什么都不做
#    6:捡罐子
# 总共7种策略
#
# 由于需要计算染色体(chromosome)的代表性，所以我们需要确定染色体的长度：
#    罗比能看到范围的情况 3 ^ 5 = 243 种，所以染色体需要243位的长度
#    我们对应染色体列表的chromosome[0]~chromosome[242]分别对应
#    [0,0,0,0,0], [0,0,0,0,0,1], [0,0,0,0,2], ~ ,[2,2,2,2,1], [2,2,2,2,2]
#    染色体的值 chromosome[n] 取值于 0 ~ 6 共 7 种策略。
# 所以需要在总共 7 ^ 243 种策略中寻找最优策略。
# 比如：
#    染色体m为chromosom[m] = "56132....62312" 长度为243, 其意义为
#    [0,0,0,0,0] 采取 5 也就是 什么都不做的策略
#    [0,0,0,0,1] 采取 6 也就是 拣罐子
#    [0,0,0,0,2] 采取 1 也就是 向右走
#    [0,0,0,1,0] 采取 3 也就是 向后走
#    [0,0,0,1,1] 采取 2 也就是 向前走
# 以此类推
# 优化的目的是找到最佳个体，其实就是染色体序列，使其的表现最佳。染色体是基因组合，种群其实就是染色体组合。
#
# 评分策略：
#    有罐子捡起 +10
#    没罐子却捡 -1
#    撞墙 -5

In [1]:
import math, random

In [279]:
class Population:
    def __init__(self, map_size, size, chrom_size, cp, mp, gen_max, max_step=200, g_loop=100):
        # 种群信息
        self.individuals = [] # 个体集合
        self.fitness = [] # 个体适应度集合
        self.selector_probability = [] # 个体选择概率集合
        self.new_individuals = [] # 新一代个体集合
        self.elitists = {}
        
        self.elitist = {'chromosome': [0, 0], 'fitness': 0, 'age': 0} # 最佳个体信息
        
        self.size = size # 种群包含个体数量
        self.chromosome_size = chrom_size # 染色体长度
        self.crossover_probability = cp # 个体间染色体交叉概率
        self.mutation_probability = mp # 个体变异概率
        
        self.generation_max = gen_max # 进化最大世代数
        self.age = 0 # 种群当前世代
        self.maxStep = max_step
        self.gen_loop = g_loop
        
        self.map_dim = map_size
        self.map = self.genMap()
        self.actions = 7
        self.sample_space = 243
        self.strategyMX = {} # 用strategyMX来指定染色体中的基因
        self._genStrategy(self.sample_space)
        self.log = []
        
        self.act = {
            0: "up",
            1: "down",
            2: "left",
            3: "right",
            4: "rand",
            5: "none",
            6: "pick"
        }
        self.status = {
            0: "none",
            1: "can",
            2: "wall"
        }
        self.pickpunish = {
            0: -1,
            # 应该不会出现人在墙上
            2: -1,
            1: 10
        }
        
        for i in range(self.size):
            tmpArr = []
            for n in range(self.sample_space):
                tmpArr.append(random.randint(0, self.actions-1))
            self.individuals.append(tmpArr)
            self.fitness.append(0)
            self.selector_probability.append(0)
            self.new_individuals.append([])

    def _genStrategy(self, sNum):
            # act = list(range(self.actions))
            tmp = [0, 0, 0, 0, 0]
            # 3^5=243种枚举策略
            # self.strategyMX[tuple(tmp)] = act
            self.strategyMX[tuple(tmp)] = 0
            # sNum -= 1
            for i in range(1, sNum):
                tmp = self._addEle(tmp)
                self.strategyMX[tuple(tmp)] = i
            return self.strategyMX

    def _addEle(self, tmp):
        m = len(tmp)
        while m >=0 :
            tmp[m-1] += 1
            if tmp[m-1] > 2:
                tmp[m-1] = 0
                m = m - 1
            else:
                return tmp
            
    def _setItem(self, items=[2, 1, 0], probDis=[0, 0.5, 0.5]):
        # 参数是概率分布数组
        p = random.uniform(0, 1)
        cumulative_probability = 0.0
        for item, item_prob in zip(items, probDis):
            cumulative_probability += item_prob
            if p < cumulative_probability:
                return item
    def showMap(self, tmpmap):
        for i in tmpmap:
            print(" ".join(map(str, i)))
    
    def genMap(self):
        tmpmap = []
        # 四边为墙
        upper = [2] * self.map_dim
        bottom = [2] * self.map_dim
        tmpmap.append(upper)
        for row in range(self.map_dim-2):
            tmpmap.append([0] * self.map_dim)
            tmpmap[row + 1][0] = 2
            for col in range(self.map_dim-2):
                tmpmap[row + 1][col+1] = self._setItem()
            tmpmap[row + 1][col+2] = 2
        tmpmap.append(bottom)
        return tmpmap
    
    def lookAround(self, pos, nowMap):
        # 只能看到上下左右中, pos是(x, y), 但要注意其实 x是纵轴，y是横轴
        x, y = pos
        return nowMap[x-1][y], nowMap[x+1][y], nowMap[x][y-1], nowMap[x][y+1], nowMap[x][y]
    
    def score(self, act, pos, nowMap):
        # 这里实际上是完成了解码和评估两步
        # 解决从染色体到具体得分的过程
        pos = list(pos)
#         print(pos, act)
        if act == "rand":
            act = ["up", "down", "left", "right", "pick", "none"][random.randint(0, 5)]
        scoreHash = {
            # “动作”: [x或者y, -1或者1] 用-1乘以这个值完成加减的转换
            "up": [0,1],
            "down": [0, -1],
            "left": [1, 1],
            "right": [1, -1],
            "none": [0, 0],
            #"rand": [random.randint(0, 1), [-1, 1][random.randint(0, 1)]],
            "pick": [0, 0]
        }
        posIDX = scoreHash[act][0]
        posDelta = scoreHash[act][1]
        pos[posIDX] += -1 * posDelta
        if act == "pick":
            sc = self.pickpunish[nowMap[pos[0]][pos[1]]]
            if nowMap[pos[0]][pos[1]] == 1:
#                 print("Picked", nowMap[pos[0]][pos[1]])
                nowMap[pos[0]][pos[1]] = 0
            return sc, tuple(pos)
        elif nowMap[pos[0]][pos[1]] == 2:
            # 墙壁无法走过去，所以要还原位置，其实很简单只要把乘数-1去掉就好
            pos[posIDX] += posDelta
            return -15, tuple(pos)
        return 0, tuple(pos)
    
    def _initPos(self):
#         return 1, 1
        return random.randint(1, self.map_dim-2), random.randint(1, self.map_dim-2)
    
    
    def genLife(self, nowChromo):
        newMap = self.genMap()
#         print("".join(map(str, nowChromo)))
#         self.showMap(newMap)
        pos = self._initPos()
        # print(pos)
        score = 0
        for i in range(self.maxStep):
            envStatus = self.lookAround(pos, newMap)
            genePOS = self.strategyMX[envStatus]
            score_gen, pos = self.score(self.act[nowChromo[genePOS]], pos, newMap)
            score += score_gen
#             print(envStatus, genePOS, nowChromo[genePOS], score)
        return score

    def fitness_func(self):
        for i in range(self.generation_max):
            for no, individual in enumerate(self.individuals):
                score = 0
                for n in range(self.gen_loop):
                    score += self.genLife(individual)
                self.fitness[no] = score / self.gen_loop
            self.evaluate()
            self.getElitist(i)
            # print(self.selector_probability)
            # print(sum(self.fitness)/len(self.fitness))
            self.log.append([i, max(self.fitness), sum(self.fitness)/len(self.fitness), min(self.fitness)])
            self.evolve()
            print(i, ": ".join(map(str, self.log[-1])))
            
        return True
    
    def evaluate(self):
        sp = self.selector_probability
        worst_score = 15 * self.maxStep
        minFIT = abs(min(self.fitness))
#         ft_sum = sum(self.fitness) + worst_score * self.size
        ft_sum = sum(self.fitness) + minFIT * self.size
        
        for i in range(self.size):
#             sp[i] = (self.fitness[i] + worst_score) / ft_sum
            sp[i] = (self.fitness[i] + minFIT) / ft_sum
        
        old = 0
#         for n, p in sorted(enumerate(sp), key= lambda x: x[1]):
#             old += p
#             sp[n] = old 
        for i in range(self.size):
            sp[i] += sp[i-1]
    
    def select(self):
        t = random.random()
        for n, p in enumerate(self.selector_probability):
            if p > t:
                break
        return n
    
    def showCh(self, chromo):
        return "".join(map(str, chromo))
    
    def cross(self, chromo1, chromo2):
        p = random.random()
        cross_pos = random.randint(0, self.sample_space-1)
        new_chromo1 = chromo1[:]
        new_chromo2 = chromo2[:]
        if chromo1 != chromo2 and p < self.crossover_probability:
            # 按照书上的交叉，是随机的点进行交换
            new_chromo1, new_chromo2 = chromo1[: cross_pos], chromo2[: cross_pos]
            new_chromo1.extend(chromo2[cross_pos:])
            new_chromo2.extend(chromo1[cross_pos:])
        return new_chromo1, new_chromo2
    
    def mutate(self, chromo):
        new_chromo = chromo[:]
        p = random.random()
        # print(p, self.mutation_probability)
        if p < self.mutation_probability:
            mutate_idx = random.randint(0, self.sample_space-1)
            mutate_val = list(range(self.actions))[random.randint(0, self.actions-1)]
            # print(mutate_idx, mutate_val, chromo[mutate_idx])
            new_chromo[mutate_idx] = mutate_val
        return new_chromo
    
    def evolve_double(self):
        i = 2
        while True:
            s_chromo1 = self.select()
            s_chromo2 = self.select()
#             print(s_chromo1, s_chromo2)
            (n_chromo1, n_chromo2) = self.cross(
                self.individuals[s_chromo1],
                self.individuals[s_chromo2])
            self.new_individuals[i] = self.mutate(n_chromo1)
            self.new_individuals[i+1] = self.mutate(n_chromo2)
            i += 2
            if i >= self.size:
                break
        self.new_individuals[0] = self.elitists["chromosome"][0][:]
        self.new_individuals[1] = self.elitists["chromosome"][1][:]
        for i in range(self.size):
            self.individuals[i] = self.new_individuals[i][:]
            
    def evolve(self):
        i = 1
        self.new_individuals[0] = self.elitists["chromosome"][:]
        while True:
            s_chromo1 = self.select()
            s_chromo2 = self.select()
            (n_chromo1, n_chromo2) = self.cross(
            self.individuals[s_chromo1],
            self.individuals[s_chromo2])
            if random.randint(0, 1) == 0:
                self.new_individuals[i] = self.mutate(n_chromo1)
            else:
                self.new_individuals[i] = self.mutate(n_chromo2)
            
            i += 1
            if i >= self.size:
                break
        for i in range(self.size):
            self.individuals[i] = self.new_individuals[i][:]
    
    def getElitist(self, age):
        self.elitists["chromosome"] = []
        self.elitists["age"] = age
        bestIndividual = [[idx, fit] for idx, fit in sorted(
            enumerate(self.fitness), key=lambda x: x[1], reverse=True
        )][0]
        self.elitists["chromosome"].extend(self.individuals[bestIndividual[0]])
        self.elitists["fitness"] = self.fitness[bestIndividual[0]]


In [277]:
m = Population(10, 200, 243, 0.85, 0.15, 500)
# len(m.individuals)
# n = m.genMap()
# for i in m.map:
#     print("".join(map(str, i)))
# m.lookAround((1,3))

In [278]:
mOld = m.fitness_func()

[37, -154.91]
0 0: -154.91: -562.6636: -992.86
[90, -191.01]
1 1: -191.01: -522.6552499999999: -969.63
[0, -120.51]
2 2: -120.51: -502.99869999999993: -878.25
[78, -143.4]
3 3: -143.4: -453.0675500000002: -852.9
[37, -149.81]
4 4: -149.81: -439.54040000000003: -812.7
[0, -107.35]
5 5: -107.35: -385.5177499999998: -817.95
[71, -86.79]
6 6: -86.79: -381.1198000000001: -753.92
[67, -95.47]
7 7: -95.47: -340.7535: -742.19
[147, -71.35]
8 8: -71.35: -321.6458500000001: -726.83
[110, -83.99]
9 9: -83.99: -301.06030000000004: -783.28
[124, -67.63]
10 10: -67.63: -274.0481000000002: -605.69
[81, -53.1]
11 11: -53.1: -254.14930000000007: -658.45
[187, -40.09]
12 12: -40.09: -250.43649999999997: -678.12
[65, -16.39]
13 13: -16.39: -224.39765000000003: -532.74
[78, -43.72]
14 14: -43.72: -227.32339999999994: -617.14
[100, -35.08]
15 15: -35.08: -204.06915000000004: -475.38
[167, -18.54]
16 16: -18.54: -206.70394999999982: -455.15
[159, -51.47]
17 17: -51.47: -193.61394999999996: -444.12
[18, -27.

[105, 2.86]
156 156: 2.86: -2.8267000000000007: -32.44
[127, 3.65]
157 157: 3.65: -2.2604000000000006: -27.96
[164, 3.0]
158 158: 3.0: -2.4049500000000013: -59.84
[40, 3.13]
159 159: 3.13: -2.8511499999999974: -134.38
[22, 3.19]
160 160: 3.19: -3.4402500000000007: -62.75
[190, 3.45]
161 161: 3.45: -2.720449999999999: -31.75
[48, 3.28]
162 162: 3.28: -2.609850000000002: -17.43
[115, 2.76]
163 163: 2.76: -2.6125000000000007: -63.75
[192, 2.75]
164 164: 2.75: -2.755250000000001: -87.18
[122, 2.45]
165 165: 2.45: -2.152800000000001: -20.04
[88, 2.98]
166 166: 2.98: -2.800100000000002: -32.13
[105, 3.56]
167 167: 3.56: -3.0286999999999984: -88.44
[66, 2.69]
168 168: 2.69: -3.4083500000000013: -37.69
[8, 3.1]
169 169: 3.1: -2.45815: -94.41
[174, 3.05]
170 170: 3.05: -2.4114999999999998: -34.58
[116, 3.39]
171 171: 3.39: -2.6330500000000017: -33.8
[6, 4.21]
172 172: 4.21: -2.3643000000000014: -33.37
[197, 3.07]
173 173: 3.07: -2.378949999999999: -30.75
[37, 4.28]
174 174: 4.28: -2.71415000000

[152, 5.12]
312 312: 5.12: 0.6736000000000004: -29.78
[139, 4.86]
313 313: 4.86: 0.932: -58.16
[29, 5.21]
314 314: 5.21: 0.49019999999999997: -28.72
[30, 4.93]
315 315: 4.93: -0.17635000000000026: -123.17
[49, 4.15]
316 316: 4.15: 0.2067500000000003: -113.62
[69, 4.96]
317 317: 4.96: -0.12824999999999967: -122.0
[191, 4.56]
318 318: 4.56: 0.33235000000000015: -59.25
[17, 4.87]
319 319: 4.87: -0.7126000000000005: -146.98
[111, 5.12]
320 320: 5.12: 0.5677499999999998: -58.31
[52, 4.97]
321 321: 4.97: 1.01645: -27.07
[185, 4.54]
322 322: 4.54: 0.7991000000000006: -57.02
[56, 5.03]
323 323: 5.03: 0.11900000000000002: -85.91
[128, 5.09]
324 324: 5.09: 0.7565000000000002: -30.84
[84, 4.43]
325 325: 4.43: 1.0707499999999992: -15.24
[195, 5.1]
326 326: 5.1: 0.3304999999999999: -42.43
[138, 4.48]
327 327: 4.48: -0.03854999999999999: -89.44
[26, 4.56]
328 328: 4.56: -0.19520000000000018: -58.96
[148, 5.04]
329 329: 5.04: 0.61505: -58.07
[57, 5.22]
330 330: 5.22: -0.13094999999999993: -85.03
[120

[125, 8.57]
471 471: 8.57: 2.9818000000000002: -113.93
[31, 8.64]
472 472: 8.64: 3.1000499999999995: -86.4
[116, 8.66]
473 473: 8.66: 3.864550000000001: -25.76
[182, 8.25]
474 474: 8.25: 3.5908499999999988: -114.98
[99, 7.74]
475 475: 7.74: 3.856549999999999: -26.31
[117, 7.56]
476 476: 7.56: 4.1686999999999985: -24.37
[175, 8.75]
477 477: 8.75: 3.913450000000003: -25.51
[109, 8.65]
478 478: 8.65: 4.103650000000001: -23.79
[15, 9.33]
479 479: 9.33: 3.00525: -114.83
[77, 8.53]
480 480: 8.53: 3.223950000000002: -56.05
[38, 7.85]
481 481: 7.85: 3.88975: -28.81
[105, 9.22]
482 482: 9.22: 3.079500000000002: -56.5
[45, 7.88]
483 483: 7.88: 3.5042500000000003: -84.41
[178, 7.83]
484 484: 7.83: 3.334650000000002: -82.87
[176, 8.17]
485 485: 8.17: 3.8330999999999977: -55.87
[59, 8.99]
486 486: 8.99: 4.0383: -52.22
[59, 7.82]
487 487: 7.82: 3.8192000000000013: -52.3
[0, 8.72]
488 488: 8.72: 4.015349999999999: -54.95
[77, 9.07]
489 489: 9.07: 4.326999999999999: -5.06
[27, 8.05]
490 490: 8.05: 3.7

In [280]:
m.log[-1]

[499, 9.99, 3.1149500000000008, -84.09]

In [267]:
m.log

[]

In [133]:
m.lookAround((6, 3)), m.strategyMX[m.lookAround((6, 3))]

((0, 1, 0, 0, 1), [0, 1, 2, 3, 4, 5, 6])

In [193]:
m.initPos()

AttributeError: 'Population' object has no attribute 'initPos'