In [87]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import abc
import tensorflow as tf
import numpy as np

from tf_agents.environments import py_environment
from tf_agents.environments import tf_environment
from tf_agents.environments import tf_py_environment
from tf_agents.environments import utils
from tf_agents.specs import array_spec
from tf_agents.environments import wrappers
from tf_agents.environments import suite_gym
from tf_agents.trajectories import time_step as ts

In [88]:
class CardGameEnv(py_environment.PyEnvironment):
    def __init__(self):
        self._action_spec = array_spec.BoundedArraySpec(
            shape=(), dtype=np.int32, minimum=0, maximum=1, name='action')
        self._observation_spec = array_spec.BoundedArraySpec(
            shape=(1,), dtype=np.int32, minimum=0, name='observation')
        self._state = 0
        self._episode_ended = False


    def action_spec(self):
        return self._action_spec

    def observation_spec(self):
        return self._observation_spec

    def _reset(self):
        self._state = 0
        self._episode_ended = False
        return ts.restart(np.array([self._state], dtype=np.int32))

    def _step(self, action):

        if self._episode_ended:
          # The last action ended the episode. Ignore the current action and start
          # a new episode.
            return self.reset()

        # Make sure episodes don't go on forever.
        if action == 1:
            self._episode_ended = True
        elif action == 0:
            new_card = np.random.randint(1, 11)
            self._state += new_card
        else:
            raise ValueError('`action` should be 0 or 1.')

        if self._episode_ended or self._state >= 21:
            reward = self._state - 21 if self._state <= 21 else -21
            return ts.termination(np.array([self._state], dtype=np.int32), reward)
        else:
            return ts.transition(
              np.array([self._state], dtype=np.int32), reward=0.0, discount=1.0)

In [89]:
environment = CardGameEnv()
utils.validate_py_environment(environment, episodes=5)

## 2. MPG Environment

In [312]:
from mpg.games import mpg
G=mpg.mpg_from_file("data/test01.in",ignore_header=1)
G

<mpg.games.mpg.MeanPayoffGraph at 0x7f1e062a1fc0>

In [418]:
from mpg.games import strategy,mpg
from mpg.rl import model_free,environment as rl_env
import importlib
importlib.reload(strategy)
importlib.reload(model_free)
importlib.reload(rl_env)
environment = rl_env.MPGEnvironment(G,0,0,10,bad_action_penalty=-40)
utils.validate_py_environment(environment, episodes=5)

In [None]:
environment.reward_spec()

In [422]:
environment = rl_env.MPGEnvironment(G,1,0,max_turns=100,bad_action_penalty=-100)

In [423]:
fixed_env=rl_env.FixedStrategyMPGEnvironment(environment,strategy.GreedyStrategy(environment.graph,turn=mpg.MeanPayoffGraph.player1))
fixed_env.reset()
agent=model_free.RLearningAgent(fixed_env)

In [424]:
agent.train()

In [426]:
agent.get_action(1)

4

In [239]:
environment._vertex=np.array(0)

In [324]:
{1:2,5:5,3:5,4:5}

{1: 2, 5: 5, 3: 5, 4: 5}

In [366]:
A={0:np.zeros(5),1:np.zeros(5)}

In [368]:
A[1][1]

0.0

In [427]:
from mpg.visualisation import game as vgame
VG=vgame.MPGVisualisation(G)
VG

MPGVisualisation(layout=Layout(height='500px', width='100%'))