In [2]:
from google.colab import drive
drive.mount('/content/drive')
! apt-get install git
!git clone https://github.com/smerrillunc/coingame.git

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Reading package lists... 0%^C
fatal: destination path 'coingame' already exists and is not an empty directory.


In [2]:
import math
import random
import matplotlib
import matplotlib.pyplot as plt
from collections import namedtuple, deque
from itertools import count

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import pandas as pd
import numpy as np
from sklearn.utils import shuffle
import unittest

# set up matplotlib
is_ipython = 'inline' in matplotlib.get_backend()
if is_ipython:
    from IPython import display

plt.ion()

# if GPU is to be used
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

import sys

sys.path.append('/content/drive/MyDrive/evoenv-main')
from evoenv.envs.coin_game import CoinGame

In [3]:
from coingame.players import PPOPlayer, DQNPlayer
from coingame.population import Population
from coingame.networks import MLP, GaussianPolicy
from coingame.memoryBuffers import ReplayBuffer, Buffer
import coingame.coinGameExperiment

DQN Test

In [4]:
# Population count and subpopulatinos
N = 8
d = 2
population_options = {'N':N,
                      'd':d}

# environment settings
n = 3

env = CoinGame
env_options = {'grid_shape':(n,n),
               'n_coins':1,
               'coin_payoffs':np.array([[1, 0], [1, -2]], dtype=np.float_)}

env_description = {'obs_dim':4*n*n,
                   'act_dim':4,
                'act_limit':1}


dqn_models = [{'model':MLP}]

dqn_models_params = {'input_size':4*n**2, \
                'output_size':4,
                'output_limit':1.0,
                'hidden_sizes':(64,64),
                'activation':torch.tanh}

dqn_player_options = {
                      'steps':0,
                      'gamma':0.99,
                      'epsilon':1.0,
                      'epsilon_decay':0.995,
                      'buffer_size':int(1e4),
                      'batch_size':64,
                      'target_update_step':100}

dqn_player_options.update(env_description)

experiment = coingame.coinGameExperiment.CoinGameExperiment(env=env,
                   env_options=env_options,
                   population_options=population_options,
                   player=DQNPlayer,
                   player_options=dqn_player_options,
                   player_models=dqn_models,
                   player_model_params=dqn_models_params,
                   device=device)


# 2 rounds, 500 games per round, 2500 time steps per game, 10 players from
# population migrating to a diff population
rounds = 2
timesteps=25
count = 10
dqn_df, dqn_players, dqn_players_df = experiment.play_multi_rounds(rounds, timesteps, count)

In [11]:
dqn_df

Unnamed: 0,start_timestep,end_timestep,episode_length,red_distance,blue_distance,red_reward,blue_reward,coin_color,red_label,blue_label,blue_population,red_population,blue_player_id,red_player_id
0,0,4,4,4,2,1.0,0.0,r,0,1,0,0,4,2
0,4,7,3,2,2,0.0,1.0,b,1,0,0,0,4,2
0,7,8,1,2,1,1.0,0.0,r,0,1,0,0,4,2
0,8,14,6,2,1,1.0,-2.0,b,-1,0,0,0,4,2
0,14,21,7,2,3,1.0,-2.0,b,-1,0,0,0,4,2
0,0,6,6,3,4,1.0,-2.0,b,-1,0,0,1,6,1
0,6,14,8,2,4,0.0,1.0,b,1,0,0,1,6,1
0,14,19,5,2,1,-2.0,1.0,r,0,-1,0,1,6,1
0,19,22,3,1,3,1.0,0.0,r,0,0,0,1,6,1
0,0,0,0,1,2,1.0,0.0,r,0,0,1,0,5,2


Single Game 2 DQN Agent

In [14]:
env_description = {'obs_dim':4*n*n,
                   'act_dim':4,
                'act_limit':1}

player_params = {'player_id':0,
                'color':'b',
                'population':1}
DQN_Player1 = DQNPlayer(player_params,
                        **env_description,
                        model=MLP,
                        model_params=dqn_models_params)

DQN_Player2 = DQNPlayer(player_params,
                        **env_description,
                        model=MLP,
                        model_params=dqn_models_params)

env, players, df = coingame.coinGameExperiment.CoinGameExperiment.play_game(CoinGame(**env_options), [DQN_Player1, DQN_Player2], 50)

In [15]:
df

Unnamed: 0,start_timestep,end_timestep,episode_length,red_distance,blue_distance,red_reward,blue_reward,coin_color,red_label,blue_label,blue_population,red_population,blue_player_id,red_player_id
0,0,4,4,2,1,1.0,-2.0,b,-1,0,1,1,0,0
0,4,5,1,3,1,0.0,1.0,b,0,0,1,1,0,0
0,5,8,3,1,2,0.0,1.0,b,1,0,1,1,0,0
0,8,26,18,2,1,0.0,1.0,b,0,0,1,1,0,0
0,26,27,1,1,1,-2.0,1.0,r,0,-1,1,1,0,0
0,27,30,3,2,3,0.0,1.0,b,1,0,1,1,0,0
0,30,34,4,2,1,-2.0,1.0,r,0,-1,1,1,0,0
0,34,36,2,3,1,0.0,1.0,b,0,0,1,1,0,0
0,36,37,1,2,1,1.0,0.0,r,0,1,1,1,0,0
0,37,43,6,2,1,0.0,1.0,b,0,0,1,1,0,0


PPO Test

In [5]:
ppo_models = [{'actor_model':GaussianPolicy,
               'critic_model':MLP},

              {'actor_model':GaussianPolicy,
              'critic_model':MLP}]


ppo_model_params = [{'input_size':4*n**2, \
                'output_size':4,
                'output_limit':1.0,
                'hidden_sizes':(64,64),
                'activation':torch.tanh},

                {'input_size':4*n**2,
                'output_size':1,
                'hidden_sizes':(128,64),
              'activation':torch.tanh}]

ppo_player_options = {'steps':0,
                    'gamma':0.99,
                    'lam':0.97,
                    'hidden_sizes':(64,64),
                    'sample_size':2048,
                    'train_policy_iters':80,
                    'train_vf_iters':80,
                    'clip_param':0.2,
                    'target_kl':0.01,
                    'policy_lr':3e-4,
                    'vf_lr':1e-3}

ppo_player_options.update(env_description)
env = CoinGame

experiment = coingame.coinGameExperiment.CoinGameExperiment(env=env,
                   env_options=env_options,
                   population_options=population_options,
                   player=PPOPlayer,
                   player_options=ppo_player_options,
                   player_models=ppo_models,
                   player_model_params=ppo_model_params,
                   device='cpu')

In [7]:
rounds = 1
timesteps=50
count = 10
dqn_df, dqn_players, dqn_players_df = experiment.play_multi_rounds(rounds, timesteps, count)

In [8]:
dqn_df

Unnamed: 0,start_timestep,end_timestep,episode_length,red_distance,blue_distance,red_reward,blue_reward,coin_color,red_label,blue_label,blue_population,red_population,blue_player_id,red_player_id
0,0,15,15,1,2,0.0,1.0,b,1,0,0,1,6,1
0,15,21,6,2,1,0.0,1.0,b,0,0,0,1,6,1
0,21,31,10,3,1,1.0,0.0,r,0,1,0,1,6,1
0,31,38,7,3,2,1.0,0.0,r,0,1,0,1,6,1
0,38,39,1,2,2,1.0,-2.0,b,-1,0,0,1,6,1
0,39,41,2,4,3,1.0,-2.0,b,-1,0,0,1,6,1
0,41,42,1,3,1,0.0,1.0,b,0,0,0,1,6,1
0,0,2,2,1,4,1.0,0.0,r,0,0,0,0,4,0
0,2,23,21,3,1,1.0,0.0,r,0,1,0,0,4,0
0,23,29,6,2,3,-2.0,1.0,r,0,-1,0,0,4,0


Single Game PPO Player

In [12]:
actor_model = GaussianPolicy
actor_model_params = {'input_size':4*n**2,
                    'output_size':4,
                    'output_limit':1.0,
                    'hidden_sizes':(64,64),
                    'activation':torch.tanh}

critic_model = MLP
critic_model_params = {'input_size':4*n**2,
                       'output_size':1,
                      'hidden_sizes':(64,64),
                    'activation':torch.tanh}


player_params = {'player_id':0,
                'color':'b',
                'population':1}

PPO_Player1 = PPOPlayer(player_params,
                        **env_description,
                        actor_model=actor_model,
                        actor_model_params=actor_model_params,
                        critic_model=critic_model,
                        critic_model_params=critic_model_params)

PPO_Player2 = PPOPlayer(player_params,
                        **env_description,
                        actor_model=actor_model,
                        actor_model_params=actor_model_params,
                        critic_model=critic_model,
                        critic_model_params=critic_model_params)

env, players, df = coingame.coinGameExperiment.CoinGameExperiment.play_game(CoinGame(**env_options), [PPO_Player1, PPO_Player2], 3000)
