# Imports

In [8]:
import gym
import wandb
import numpy as np
import pandas as pd

In [9]:
from simplenn.structures.network.activations import LRelu
from simplenn.structures.network.activations import TanH
from simplenn.structures.network.activations import Sigmoid
from simplenn.evaluation.simulation import Simulation
from simplenn.structures.network import Network
from simplenn.structures.network import Layer
from simplenn.optim.evolution import Genetic

In [10]:
#env = gym.make('MountainCar-v0')
#env = gym.make('Acrobot-v1')
#env = gym.make('CartPole-v0')
env = gym.make('CartPole-v1')

In [11]:
%matplotlib tk

# Performing experiment

In [12]:
# Config dict
config = {}

# Genetic parameters
config["popSize"] = 50
config["nElitism"] = 1
config["generations"] = 20
config["tournamentSize"] = 5
config["mutationProba"] = 0.25
config["mutationScale"] = 0.05
config["mutationRelative"] = False

# Evaluation related parameters
config["nTest"] = 10
config["actionSelectionMode"] = "argmax"
config["verboseFreq"] = 1
config["recordFreq"] = 1

# Wandb only parameters
config["envId"] = env.env.spec.id

In [13]:
def doLearning(config):
    
    Q = Network(
        Simulation(env, config["nTest"], config["actionSelectionMode"], True),
        [
            Layer(env.observation_space.shape[0], 3, Sigmoid(), "kaiming"),
            Layer(3, env.action_space.n, Sigmoid(), "kaiming", True)
        ]
    )
    config["qfunctionClass"] = Q.__class__.__name__
    config["networkTopology"] = Q.printTopology()
    
    algo = Genetic()
    config["algoClass"] = algo.__class__.__name__
    
    wandb.init(project="simple_rl", config=config)
    
    Q = algo.run(
        Q, 
        None, 
        None, 
        config["popSize"], 
        config["tournamentSize"],
        config["nElitism"], 
        config["mutationProba"], 
        config["mutationScale"], 
        config["mutationRelative"], 
        config["generations"], 
        config["verboseFreq"], 
        config["recordFreq"],
        wandb
    )
    
    return (Q, algo)

In [14]:
nRuns = 3

for i in range(nRuns):
    
    print(f"{i+1}/{nRuns}")
    Q, algo = doLearning(config)

1/3


2/3


3/3


# Analyzing the results

### Animation

In [23]:
print(f"Score: {Q.loss.performEpisode(Q, render=True)}")

Score: 500.0


### Score distribution

In [21]:
pd.Series([Q.loss.performEpisode(Q) for _ in range(250)]).describe()

count    250.000000
mean     467.588000
std       78.958038
min      176.000000
25%      500.000000
50%      500.000000
75%      500.000000
max      500.000000
dtype: float64