In [1]:
import gym
import numpy as np
import random
from statistics import median, mean
from collections import Counter

In [2]:
class enviroment():
    def __init__(self):
        self.LR = 1e-3
        self.env = gym.make("CartPole-v0")
        self.env.reset()
        self.goal_steps = 500
        self.score_requirement = 25
        self.initial_games = 1000
    def generate_data(self):
        # [OBS, MOVES]
        training_data = []
        # all scores:
        scores = []
        # just the scores that met our threshold:
        accepted_scores = []
        # iterate through however many games we want:
        for _ in range(self.initial_games):
            score = 0
            # moves specifically from this environment:
            game_memory = []
            # previous observation that we saw
            prev_observation = []
            # for each frame in 200
            for _ in range(self.goal_steps):
                # choose random action (0 or 1)
                action = random.randrange(0,2)
                # do it!
                observation, reward, done, info = self.env.step(action)

                # notice that the observation is returned FROM the action
                # so we'll store the previous observation here, pairing
                # the prev observation to the action we'll take.
                if len(prev_observation) > 0 :
                    game_memory.append([prev_observation, action])
                prev_observation = observation
                score+=reward
                if done: break

            # IF our score is higher than our threshold, we'd like to save
            # every move we made
            # NOTE the reinforcement methodology here. 
            # all we're doing is reinforcing the score, we're not trying 
            # to influence the machine in any way as to HOW that score is 
            # reached.
            if score >= self.score_requirement:
                accepted_scores.append(score)
                for data in game_memory:
                    # convert to one-hot (this is the output layer for our neural network)
                    if data[1] == 1:
                        output = [0,1]
                    elif data[1] == 0:
                        output = [1,0]

                    # saving our training data
                    training_data.append([data[0], output])

            # reset env to play again
            self.env.reset()
            # save overall scores
            scores.append(score)

        # just in case you wanted to reference later
        training_data_save = np.array(training_data)
        np.save('saved.npy',training_data_save)

        # some stats here, to further illustrate the neural network magic!
        print('Average accepted score:',mean(accepted_scores))
        print('Median score for accepted scores:',median(accepted_scores))
        print(Counter(accepted_scores))

        return training_data

In [3]:
model = enviroment()
model.initial_games = 100000
model.score_requirement = 50

[2017-06-28 11:30:49,178] Making new env: CartPole-v0


In [4]:
x = model.generate_data()

Average accepted score: 61.68511198945981
Median score for accepted scores: 58.0
Counter({50.0: 291, 51.0: 284, 52.0: 233, 54.0: 228, 53.0: 214, 55.0: 200, 56.0: 192, 57.0: 168, 58.0: 154, 60.0: 147, 59.0: 132, 62.0: 120, 61.0: 108, 65.0: 102, 63.0: 94, 64.0: 94, 66.0: 86, 68.0: 73, 67.0: 72, 71.0: 68, 69.0: 67, 70.0: 63, 72.0: 49, 73.0: 42, 74.0: 41, 80.0: 38, 75.0: 33, 76.0: 31, 77.0: 29, 78.0: 26, 79.0: 25, 83.0: 23, 86.0: 23, 82.0: 22, 84.0: 20, 89.0: 19, 88.0: 17, 85.0: 14, 81.0: 13, 91.0: 13, 98.0: 13, 87.0: 12, 90.0: 10, 95.0: 9, 94.0: 8, 92.0: 7, 96.0: 7, 93.0: 6, 106.0: 6, 110.0: 5, 97.0: 4, 101.0: 4, 103.0: 4, 104.0: 4, 105.0: 3, 119.0: 3, 102.0: 2, 111.0: 2, 112.0: 2, 120.0: 2, 136.0: 1, 137.0: 1, 138.0: 1, 142.0: 1, 145.0: 1, 100.0: 1, 107.0: 1, 108.0: 1, 113.0: 1, 114.0: 1, 115.0: 1, 118.0: 1, 121.0: 1, 122.0: 1})
