# Imports

In [2]:
# import cma # library of numpy evolutionary algorithms
# import cma.test as tst

import deap
from deap import creator, base, tools, benchmarks, cma, algorithms
import numpy as np
import gym
from time import time

from core.agents import Linear, MLP, rnn
from core.microbial import Microbial
# from core.optimizers import microbial

In [3]:
import sys, os

# Disable
def blockPrint():
    sys.stdout = open(os.devnull, 'w')

# Restore
def enablePrint():
    sys.stdout = sys.__stdout__

# Global Evo Algorithm Parameters & Functions

In [4]:
RANDOM_SEED = None

ENV_NAME = 'Pendulum-v0'
EPISODES = 8  # Number of times to run envionrment when evaluating
STEPS = 200  # Max number of steps to run run simulation

env = gym.make(ENV_NAME)

# Controller Parameters
in_dim = env.observation_space.shape[0]  # Input to controller (observ.)
out_dim = env.action_space.shape[0]  # Output from controller (action)

# Controllers
linear_agent = Linear(in_dim, out_dim)
mlp_agent_1 = MLP(in_dim, out_dim, layers=[3,3])
mlp_agent_2 = MLP(in_dim, out_dim, layers=[10,5])
rnn_agent_1 = rnn(in_dim, out_dim, nodes=5)
rnn_agent_2 = rnn(in_dim, out_dim, nodes=10)

# ------------------------------------------------------------------------------
#                               SET UP GA PARAMETERS
# ------------------------------------------------------------------------------
POPULATION_SIZE = 40
NUM_GEN = 300   # Number of generations
DEME_SIZE = 3  # from either side

# ------------------------------------------------------------------------------
#                               CREATE GA
# ------------------------------------------------------------------------------


creator.create("FitnessMax", base.Fitness, weights=(1.0,))
creator.create("Individual", np.ndarray, fitness=creator.FitnessMax)

# NUM_PARAMS = agent.num_params

# toolbox.register("attr_floats", lambda n: (np.random.rand(n).astype(
#     np.float32)-0.5)*2, NUM_PARAMS)
# toolbox.register("individual", tools.initIterate, creator.Individual,
#                  toolbox.attr_floats)

def evaluate(individual, agent=None):
    """Lends heavily from evaluate.py"""
    # Load weights into RNN
    if agent is None:
        raise Exception('No agent set')
    agent.set_weights(individual)


    total_reward = 0
    for episode in range(EPISODES):
        
        observation = env.reset()
        episode_reward = 0
        
        for step in range(STEPS):
            observation = observation / np.array([1,1,8])
            action = agent.percieve(observation) * 2
            observation, reward, done, _ = env.step(action)
            episode_reward += reward

            if done:
                break
        total_reward += episode_reward

    # returns the average reward for number of episodes run
    total_reward /= EPISODES
    
    return [total_reward]

toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=linear_agent)

[2018-04-25 11:48:20,483] Making new env: Pendulum-v0


# Evolved using CMAES

In [5]:
def CMAES(agent, toolbox, sigma=1, POPULATION_SIZE=None, NUM_GEN=100, seed=None):
    if seed is not None:
        np.random.seed(seed)
    if POPULATION_SIZE is None:
        POPULATION_SIZE = 20*agent.num_params
    
    print('Begining training using CMA-ES')
    print('Parameters per agent = %s' % agent.num_params)
    strategy = cma.Strategy(centroid=[0.0]*agent.num_params, sigma=sigma, lambda_=POPULATION_SIZE)
    toolbox.register("generate", strategy.generate, creator.Individual)
    toolbox.register("update", strategy.update)

    hof = tools.HallOfFame(3, np.ma.allequal)
    stats = tools.Statistics(lambda ind: ind.fitness.values)
    stats.register("avg", np.mean, axis=0)
    stats.register("std", np.std, axis=0)
    stats.register("min", np.min, axis=0)
    stats.register("max", np.max, axis=0)
    
    time_to_compute = time()
    pop, logbook = algorithms.eaGenerateUpdate(toolbox, ngen=NUM_GEN, stats=stats, halloffame=hof)
    time_to_compute -= time()
    
    return pop, logbook, hof, -time_to_compute

In [7]:
toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=linear_agent)
result_1 = CMAES(linear_agent, toolbox, NUM_GEN=200, seed=RANDOM_SEED)
print('Time to evaluate %0.2f s' % result_1[3])

Begining training using CMA-ES
Parameters per agent = 4
gen	nevals	avg             	std           	min             	max             
0  	80    	[-1412.27450751]	[118.95191795]	[-1737.04889644]	[-1150.59133342]
1  	80    	[-1350.84248453]	[126.33162596]	[-1614.41997456]	[-1032.87043682]
2  	80    	[-1288.21299295]	[126.87622143]	[-1624.05624179]	[-1017.84608831]
3  	80    	[-1279.19016694]	[125.88889364]	[-1601.00447498]	[-983.56287961] 
4  	80    	[-1246.87025476]	[99.63140912] 	[-1455.46674479]	[-1018.00534719]
5  	80    	[-1212.00303624]	[107.09202237]	[-1480.74114645]	[-906.37204048] 
6  	80    	[-1210.86434258]	[94.35460015] 	[-1478.81369373]	[-985.93765559] 
7  	80    	[-1196.08567339]	[126.13905536]	[-1569.22806927]	[-844.15545827] 
8  	80    	[-1189.08780496]	[139.20723215]	[-1488.10916455]	[-817.67560517] 
9  	80    	[-1137.78507613]	[125.97590714]	[-1397.87771359]	[-630.84603254] 
10 	80    	[-1143.05023756]	[140.06400535]	[-1525.15379304]	[-797.85417726] 
11 	80    	[-1096.67

In [None]:
toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=mlp_agent_1)
result_2 = CMAES(mlp_agent_1, toolbox, NUM_GEN=200, seed=RANDOM_SEED)
print('Time to evaluate %0.2f s' % result_2[3])

Begining training using CMA-ES
Parameters per agent = 24
gen	nevals	avg            	std           	min             	max            
0  	480   	[-1400.0188781]	[107.66170665]	[-1809.49177072]	[-921.15838536]
1  	480   	[-1391.79392376]	[115.73762437]	[-1751.78915355]	[-928.3439629] 
2  	480   	[-1402.40767763]	[107.25252874]	[-1832.51656998]	[-1025.29863726]
3  	480   	[-1399.81440492]	[120.93188672]	[-1786.2418578] 	[-895.93984384] 
4  	480   	[-1395.44283693]	[117.532756]  	[-1800.8339044] 	[-1016.0311078] 
5  	480   	[-1376.08958531]	[123.21357027]	[-1838.15533763]	[-997.85253807] 
6  	480   	[-1371.25455688]	[117.37264764]	[-1680.50635142]	[-941.42737199] 
7  	480   	[-1375.25636733]	[112.30133956]	[-1787.03736987]	[-1074.27289312]
8  	480   	[-1375.09742857]	[121.14162018]	[-1756.01501986]	[-882.06127426] 
9  	480   	[-1367.97898208]	[124.2079601] 	[-1840.35103869]	[-973.60246549] 
10 	480   	[-1370.93133736]	[122.66547586]	[-1857.86922102]	[-930.22994861] 
11 	480   	[-1361.836126

In [None]:
toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=mlp_agent_2)
result_3 = CMAES(mlp_agent_2, toolbox, NUM_GEN=200, seed=RANDOM_SEED)
print('Time to evaluate %0.2f s' % result_3[3]

In [26]:
toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=rnn_agent_1)
result_4 = CMAES(rnn_agent_1, toolbox, NUM_GEN=200, seed=RANDOM_SEED)
print('Time to evaluate %0.2f s' % result_4[3])

Begining training using CMA-ES
Parameters per agent = 51
gen	nevals	avg             	std           	min             	max            
0  	1020  	[-1218.39051368]	[130.01053708]	[-1602.68612349]	[-728.56876873]
1  	1020  	[-1220.70502885]	[127.4017654] 	[-1623.19591669]	[-828.04056706]
Time to evaluate 158.29 s


In [None]:
toolbox = base.Toolbox()
toolbox.register("evaluate", evaluate, agent=rnn_agent_2)
result_5 = CMAES(rnn_agent_2, toolbox, NUM_GEN=200, seed=RANDOM_SEED)
print('Time to evaluate %0.2f s' % result_5[3])

Begining training using CMA-ES
Parameters per agent = 151
gen	nevals	avg             	std           	min             	max           
0  	3020  	[-1219.76443128]	[125.90238625]	[-1652.93547778]	[-769.0709582]
1  	3020  	[-1218.172442]  	[125.14082289]	[-1690.17557616]	[-765.62710396]
2  	3020  	[-1218.84045763]	[122.99351975]	[-1673.55548066]	[-819.18525022]
3  	3020  	[-1215.88987263]	[130.80288336]	[-1757.3804445] 	[-748.12889001]
4  	3020  	[-1217.54325015]	[128.91827369]	[-1643.52983805]	[-811.41512325]
5  	3020  	[-1217.63615592]	[127.97850897]	[-1628.59562655]	[-761.90949525]
6  	3020  	[-1221.86133821]	[126.58215551]	[-1635.4215257] 	[-813.77377146]
7  	3020  	[-1215.05062392]	[125.65926275]	[-1675.57835652]	[-814.49855658]
8  	3020  	[-1218.09194426]	[126.86473726]	[-1648.94278719]	[-783.61621937]
9  	3020  	[-1216.04957269]	[124.9576285] 	[-1626.9150134] 	[-743.06145617]
10 	3020  	[-1216.77232384]	[124.05357461]	[-1686.79411577]	[-749.86857887]
11 	3020  	[-1221.56908803]	[126

# Microbial GA

In [7]:
m = Microbial(linear_agent,evaluate)
result_6 = m.run(200)

<deap.tools.support.Statistics object at 0x1150f6400>


KeyboardInterrupt: 

In [8]:
m = Microbial(mlp_agent_1,evaluate)
result_7 = m.run(23)

<deap.tools.support.Statistics object at 0x11517f588>
gen	nevals	avg             	std          	min             	max             
0  	40    	[-1358.03772982]	[94.05077883]	[-1529.07963426]	[-1112.90501523]
1  	20    	[-1340.91558664]	[89.24914767]	[-1529.07963426]	[-1112.90501523]
2  	20    	[-1333.71209613]	[95.41704514]	[-1570.30746754]	[-1112.90501523]
3  	20    	[-1314.75230913]	[96.60226336]	[-1535.53979141]	[-1068.00145462]
4  	20    	[-1296.88062982]	[93.07847926]	[-1535.53979141]	[-1068.00145462]
5  	20    	[-1301.90631253]	[104.21765145]	[-1601.06370498]	[-1068.00145462]
6  	20    	[-1289.70667028]	[96.16601333] 	[-1457.31504625]	[-1068.00145462]
7  	20    	[-1271.93534867]	[93.64472762] 	[-1457.31504625]	[-1068.00145462]
8  	20    	[-1284.39167079]	[113.11480283]	[-1648.07881261]	[-1068.00145462]
9  	20    	[-1289.13853165]	[122.18619765]	[-1593.66811538]	[-1068.00145462]
10 	20    	[-1297.34195184]	[117.10602232]	[-1493.63823127]	[-1068.00145462]
11 	20    	[-1266.44990081]	

In [9]:
print('Time to evaluate %0.2f s' % result_7[3])

Time to evaluate 36.20 s


In [11]:
m = Microbial(mlp_agent_2,evaluate)
result_8 = m.run(20)
print('Time to evaluate %0.2f s' % result_8[3])

<deap.tools.support.Statistics object at 0x1151afd30>
gen	nevals	avg             	std          	min             	max             
0  	40    	[-1379.20723355]	[98.71762914]	[-1568.50932023]	[-1129.01332781]
1  	20    	[-1355.26929754]	[96.96495677]	[-1568.50932023]	[-1129.01332781]
2  	20    	[-1373.83948333]	[136.27844903]	[-1711.4971812] 	[-1129.01332781]
3  	20    	[-1366.82404562]	[140.80475887]	[-1711.4971812] 	[-1129.01332781]
4  	20    	[-1349.63154751]	[152.5278739] 	[-1711.4971812] 	[-899.42931065] 
5  	20    	[-1319.96217036]	[139.94404355]	[-1593.60316691]	[-899.42931065] 
6  	20    	[-1318.5731551] 	[143.12950323]	[-1674.6105568] 	[-899.42931065] 
7  	20    	[-1304.55808753]	[146.00169304]	[-1596.34837142]	[-899.42931065] 
8  	20    	[-1301.88550147]	[161.59877227]	[-1772.39766953]	[-899.42931065] 
9  	20    	[-1319.80594958]	[182.29385859]	[-1739.50335299]	[-899.42931065] 
10 	20    	[-1296.92461581]	[150.52999534]	[-1627.64775472]	[-899.42931065] 
11 	20    	[-1279.4115775

In [14]:
m = Microbial(rnn_agent_1,evaluate)
result_9 = m.run(20)

<deap.tools.support.Statistics object at 0x1151c5ac8>
gen	nevals	avg             	std           	min             	max            
0  	40    	[-1199.00473424]	[133.08352934]	[-1459.62938003]	[-984.14178689]
1  	20    	[-1203.33984108]	[132.78232697]	[-1459.62938003]	[-984.14178689]
2  	20    	[-1191.83129736]	[130.67752766]	[-1540.8620147] 	[-984.14178689]
3  	20    	[-1191.75033629]	[119.14639492]	[-1407.04692078]	[-984.14178689]
4  	20    	[-1171.93320866]	[141.30952211]	[-1553.08466666]	[-894.12559027]
5  	20    	[-1146.19404723]	[127.30087502]	[-1386.28078178]	[-888.71481912]
6  	20    	[-1149.81831141]	[132.8018604] 	[-1391.90885785]	[-888.71481912]
7  	20    	[-1146.07491674]	[141.56567093]	[-1490.00352059]	[-888.71481912]
8  	20    	[-1155.40543012]	[145.9960781] 	[-1475.23578744]	[-888.71481912]
9  	20    	[-1159.145077]  	[150.3757662] 	[-1463.12435817]	[-888.71481912]
10 	20    	[-1160.91223874]	[150.74257794]	[-1453.40041022]	[-888.71481912]
11 	20    	[-1163.80821741]	[139.0

In [15]:
m = Microbial(rnn_agent_1,evaluate)
result_10 = m.run(20)

<deap.tools.support.Statistics object at 0x1151a3c18>
gen	nevals	avg             	std           	min             	max            
0  	40    	[-1220.03116684]	[121.68943274]	[-1499.50650668]	[-975.92362235]
1  	20    	[-1205.50387978]	[142.13054976]	[-1542.30818853]	[-898.25102631]
2  	20    	[-1179.4864764] 	[122.51756692]	[-1382.76034236]	[-898.25102631]
3  	20    	[-1153.59252762]	[112.51812131]	[-1370.36444214]	[-898.25102631]
4  	20    	[-1150.13923835]	[129.24489195]	[-1432.68080218]	[-898.25102631]
5  	20    	[-1136.13437876]	[134.40764115]	[-1555.59034709]	[-898.25102631]
6  	20    	[-1144.76204985]	[140.28713684]	[-1457.27931115]	[-898.25102631]
7  	20    	[-1116.00857512]	[116.54385702]	[-1380.34798474]	[-898.25102631]
8  	20    	[-1112.03491019]	[114.96868276]	[-1371.46640422]	[-898.25102631]
9  	20    	[-1138.12919599]	[134.25961945]	[-1465.14012194]	[-898.25102631]
10 	20    	[-1123.97602456]	[137.15349543]	[-1402.97943249]	[-898.25102631]
11 	20    	[-1143.60724697]	[141.9

In [8]:
import numpy as np


array([-1, -2, -3])

In [1]:
import core.agent_sota as sota

sota.num_episodes = 10
_ = sota.train(False)

[2018-04-24 21:58:28,076] Making new env: Pendulum-v0
[2018-04-24 21:58:28,099] Clearing 8 monitor files from previous run (because force=True was provided)
[2018-04-24 21:58:28,964] Starting new video recorder writing to /tmp/ddpg-agent-results/openaigym.video.0.90727.video000000.mp4
[2018-04-24 21:58:33,277] Starting new video recorder writing to /tmp/ddpg-agent-results/openaigym.video.0.90727.video000001.mp4


Episode  0, Reward: -1673.023, Steps: 200, Final noise scale:   0.400
Episode  1, Reward: -1621.043, Steps: 200, Final noise scale:   0.396
Episode  2, Reward: -1850.460, Steps: 200, Final noise scale:   0.392
Episode  3, Reward: -1541.631, Steps: 200, Final noise scale:   0.388
Episode  4, Reward: -1884.443, Steps: 200, Final noise scale:   0.384
Episode  5, Reward: -1363.741, Steps: 200, Final noise scale:   0.380
Episode  6, Reward: -1161.320, Steps: 200, Final noise scale:   0.377


[2018-04-24 21:58:39,644] Starting new video recorder writing to /tmp/ddpg-agent-results/openaigym.video.0.90727.video000008.mp4


Episode  7, Reward: -1512.119, Steps: 200, Final noise scale:   0.373
Episode  8, Reward: -1649.362, Steps: 200, Final noise scale:   0.369


[2018-04-24 21:58:43,943] Finished writing results. You can upload them to the scoreboard via gym.upload('/tmp/ddpg-agent-results')


Episode  9, Reward: -1457.932, Steps: 200, Final noise scale:   0.365


In [2]:
_

(array([-1673.02310291, -1621.043032  , -1850.46016875, -1541.63110343,
        -1884.44331758, -1363.74053841, -1161.32040539, -1512.1188674 ,
        -1649.36235721, -1457.93195269]), 14.980122804641724)

In [1]:
from core.simulator import Simulator

In [6]:
sim_env = Simulator(5, env)

In [18]:
sim_env.step(np.array(0)[None][None])

-0.099939875
0.3233194
0.4184589


(array([[-0.22801816,  0.07112531,  0.3216886 ]], dtype=float32),
 array([[-1.01764931]]),
 True,
 None)

In [11]:
np.array(0)[None][None]

array([[0]])

In [None]:
sim_env.reset()