# Imports

In [1]:
import gym
import wandb
import numpy as np
import pandas as pd

In [2]:
from simplenn.structures.network.activations import LRelu
from simplenn.structures.network.activations import TanH
from simplenn.structures.network.activations import Sigmoid
from simplenn.evaluation.simulation import Simulation
from simplenn.structures.network import Network
from simplenn.structures.network import Layer
from simplenn.optim.evolution import Genetic

In [3]:
#env = gym.make('MountainCar-v0')
#env = gym.make('Acrobot-v1')
#env = gym.make('CartPole-v0')
env = gym.make('CartPole-v1')

In [4]:
%matplotlib tk

# Performing experiment

In [5]:
# Config dict
config = {}

# Genetic parameters
config["popSize"] = 50
config["nElitism"] = 1
config["generations"] = 20
config["tournamentSize"] = 5
config["mutationProba"] = 0.1
config["mutationScale"] = 0.05
config["mutationRelative"] = False

# Evaluation related parameters
config["nTest"] = 10
config["actionSelectionMode"] = "argmax"
config["verboseFreq"] = 1
config["recordFreq"] = 1

# Wandb only parameters
config["envId"] = env.env.spec.id

In [6]:
def doLearning(config):
    
    # Components
    
    Q = Network(
        [
            Layer(env.observation_space.shape[0], 3, Sigmoid(), "kaiming"),
            Layer(3, env.action_space.n, Sigmoid(), "kaiming", True)
        ],
        actionSelectionMode=config["actionSelectionMode"]
    )
    config["qfunctionClass"] = Q.__class__.__name__
    config["networkTopology"] = Q.printTopology()
    
    algo = Genetic()
    config["algoClass"] = algo.__class__.__name__
    
    sim = Simulation(env, config["nTest"], True)
    
    # Running
    
    wandb.init(project="simple_rl", config=config)
    Q = algo.run(
        Q, 
        sim,
        config["popSize"], 
        config["tournamentSize"],
        config["nElitism"], 
        config["mutationProba"], 
        config["mutationScale"], 
        config["mutationRelative"], 
        config["generations"], 
        config["verboseFreq"], 
        config["recordFreq"],
        wandb
    )
    
    # Returning result
    
    return (Q, algo)

In [None]:
nRuns = 3

for i in range(nRuns):
    
    print(f"{i+1}/{nRuns}")
    Q, algo = doLearning(config)

# Analyzing the results

In [8]:
sim = Simulation(env, True)

### Animation

In [9]:
print(f"Score: {sim.performEpisode(Q, render=True)}")

Score: 500.0


### Score distribution

In [16]:
pd.Series(sim.performEpisodes(Q, 250)[1]).describe()

count    250.000000
mean     472.088000
std       40.518728
min      332.000000
25%      440.500000
50%      500.000000
75%      500.000000
max      500.000000
dtype: float64