In [1]:
import gym
import numpy as np
import random
from statistics import median, mean
from collections import Counter

In [2]:
class enviroment():
    def __init__(self):
        self.LR = 1e-3
        self.env = gym.make("CartPole-v0")
        self.env.reset()
        self.goal_steps = 500
        self.score_requirement = 25
        self.initial_games = 1000
    def generate_data(self):
        # [OBS, MOVES]
        training_data = []
        # all scores:
        scores = []
        # just the scores that met our threshold:
        accepted_scores = []
        # iterate through however many games we want:
        for _ in range(self.initial_games):
            score = 0
            # moves specifically from this environment:
            game_memory = []
            # previous observation that we saw
            prev_observation = []
            # for each frame in 200
            for _ in range(self.goal_steps):
                # choose random action (0 or 1)
                action = random.randrange(0,2)
                # do it!
                observation, reward, done, info = self.env.step(action)

                # notice that the observation is returned FROM the action
                # so we'll store the previous observation here, pairing
                # the prev observation to the action we'll take.
                if len(prev_observation) > 0 :
                    game_memory.append([prev_observation, action])
                prev_observation = observation
                score+=reward
                if done: break

            # IF our score is higher than our threshold, we'd like to save
            # every move we made
            # NOTE the reinforcement methodology here. 
            # all we're doing is reinforcing the score, we're not trying 
            # to influence the machine in any way as to HOW that score is 
            # reached.
            if score >= self.score_requirement:
                accepted_scores.append(score)
                for data in game_memory:
                    # convert to one-hot (this is the output layer for our neural network)
                    if data[1] == 1:
                        output = [0,1]
                    elif data[1] == 0:
                        output = [1,0]

                    # saving our training data
                    training_data.append([data[0], output])

            # reset env to play again
            self.env.reset()
            # save overall scores
            scores.append(score)

        # just in case you wanted to reference later
        training_data_save = np.array(training_data)
        np.save('saved.npy',training_data_save)

        # some stats here, to further illustrate the neural network magic!
        print('Average accepted score:',mean(accepted_scores))
        print('Median score for accepted scores:',median(accepted_scores))
        print(Counter(accepted_scores))

        return training_data

In [3]:
if __name__ == '__main__':
    model = enviroment()
    model.initial_games = 100000
    model.score_requirement = 50
    x = model.generate_data()

[2017-06-28 15:32:27,047] Making new env: CartPole-v0


Average accepted score: 61.55036456926816
Median score for accepted scores: 58.0
Counter({51.0: 288, 50.0: 270, 52.0: 261, 54.0: 232, 53.0: 204, 55.0: 196, 56.0: 173, 57.0: 160, 59.0: 152, 61.0: 142, 58.0: 141, 60.0: 132, 63.0: 113, 64.0: 111, 62.0: 102, 65.0: 101, 66.0: 69, 67.0: 65, 72.0: 61, 68.0: 53, 69.0: 53, 71.0: 52, 74.0: 46, 70.0: 43, 73.0: 41, 80.0: 30, 75.0: 29, 78.0: 29, 81.0: 27, 77.0: 25, 79.0: 23, 76.0: 21, 88.0: 18, 82.0: 16, 86.0: 16, 84.0: 15, 87.0: 13, 93.0: 13, 83.0: 12, 85.0: 12, 89.0: 12, 94.0: 11, 90.0: 9, 91.0: 8, 92.0: 8, 95.0: 8, 96.0: 6, 99.0: 6, 102.0: 6, 97.0: 5, 100.0: 5, 103.0: 5, 111.0: 5, 98.0: 4, 101.0: 4, 105.0: 3, 106.0: 3, 107.0: 3, 109.0: 3, 119.0: 3, 104.0: 2, 112.0: 2, 113.0: 2, 114.0: 2, 115.0: 2, 116.0: 2, 117.0: 2, 118.0: 2, 131.0: 1, 134.0: 1, 137.0: 1, 140.0: 1, 108.0: 1, 110.0: 1, 121.0: 1, 124.0: 1, 125.0: 1, 127.0: 1})
