<a href="https://colab.research.google.com/github/patrxon/GameLearning/blob/main/GameLearning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Program Initialization

Imports


In [None]:
import random
import sys
import tensorflow as tf
import keras.layers as Kl
import keras.models as Km
import numpy as np

Agent


In [None]:
class Agent():
  def __init__(self, player, params, game, lrnf=0.5, expf=0.8):
    self.game = game
    self.player = player
    self.lrnf = lrnf #lerning factor
    self.expf = expf #explore factor
    self.prev_state = game.get_state()
    self.network = self.init_network(len(game.get_state()), params)

  def init_network(self, input, params):
    network = Km.Sequential()
    network.add(Kl.Dense(params[0], activation='relu', input_dim=input))
    for param in params[1:]:
      network.add(Kl.Dense(param, activation='relu'))
    network.add(Kl.Dense(1, activation='linear'))
    network.compile(optimizer='adam', loss='mean_absolute_error', metrics=['accuracy'])
    #network.summary()

    return network

  def load_model(self, file_name):
    try:
      new_model = Km.load_model(file_name)
    except:
      print("Wrong file")
      return
    self.network = new_model 
  
  def get_model(self):
    return self.network

  def change_factors(self, lrnf, expf):
    if lrnf!=None:
      self.lrnf = lrnf
    if expf!=None:
      self.expf = expf

  def get_value(self, move):
    if not isinstance(move[0], list):
      move = [move]
    return self.network.predict(np.array(move))

  def get_target(self):
    prev_value = self.get_value(self.prev_state)
    curr_value = 0
    reward = self.game.get_reward(self.player)
    if reward == 0:
      curr_value = self.get_value(self.game.get_state())
    return np.array(prev_value + self.lrnf * (reward + curr_value - prev_value))

  def train_network(self):
    if not isinstance(self.prev_state[0], list):
      self.prev_state = [self.prev_state]
    
    if self.player in self.game.get_state():
      target = self.get_target()
      if target is not None:
        self.network.fit(np.array(self.prev_state), target, epochs=10, verbose=0)
    self.prev_state = self.game.get_state()

  def make_move(self, exploit=False, explore=False): #explore/exploit forces behavior
    p = random.uniform(0,1)

    moves = self.game.get_moves(self.game.get_state(), self.player)

    if len(moves) < 1:
      return self.game.get_state()

    if (p > self.expf or exploit) and not explore:
      return self.exploit(moves)
    else:
      return self.explore(moves)
      
  def explore(self, moves):
    move = random.choice(moves)
    return move
  
  def exploit(self, moves):
    move = None
    v = -float('Inf')

    for temp_move in moves:
      v_temp = self.get_value(temp_move)
      if v_temp > v:
        v = v_temp
        move = temp_move

    return move

Training loop

In [None]:
class Trainer():

  def __init__(self, game, params, lrnf, expf):
    self.game = game
    self.agent_p1 = Agent(1,params,game,lrnf,expf)
    self.agent_p2 = Agent(-1,params,game,lrnf,expf)

    self.agents = [self.agent_p1,self.agent_p2]

  def train_agents(self):
    
    self.game.reset_game()
    iter = 0
    while not self.game.check_win():
      move = self.agents[iter%2].make_move()
      self.agents[iter%2].train_network()
      self.game.make_move(move)
      iter+=1
    self.agents[0].train_network()
    self.agents[1].train_network()
    self.agents[0].train_network()
    self.agents[1].train_network()
      

  def test_agent(self, loops, player):
    win_list = []

    for loop in range(loops):
      self.game.reset_game()
      iter = 0
      while not self.game.check_win():
        if iter%2 == player:
          move = self.agents[iter%2].make_move(exploit=True)
        else:
          move = self.agents[iter%2].make_move(explore=True)

        self.game.make_move(move)
        iter+=1
        
      win_list.append(self.game.get_winner())
      

    wins_1 = win_list.count(1)
    wins_2 = win_list.count(-1)
    ties = win_list.count(0)

    if player == 0:
      print(" , player1 prc: " + str(wins_1/loops) , end = "")
    else:
      print(" , player2 prc: " + str(wins_2/loops) , end = "")

  def spar_agents(self, loops):
    win_list = []

    for loop in range(loops):
      self.game.reset_game()
      iter = 0
      while not self.game.check_win():
        move = self.agents[iter%2].make_move()
        self.game.make_move(move)
        iter+=1
        
      win_list.append(self.game.get_winner())
      

    wins_1 = win_list.count(1)
    wins_2 = win_list.count(-1)
    ties = win_list.count(0)

    print(" , player1 prc: " + str(wins_1/loops) , end = "")
    print(" , player2 prc: " + str(wins_2/loops))

  def tune_agents(self, exp1, epx2, lrn1, lrn2):
    self.agents[0].change_factors(lrn1, exp1)
    self.agents[1].change_factors(lrn2, epx2)

  def save_agent(self, player, path):
    model = self.agents[player].get_model()
    model.save(path)

  def load_agent(self, player, path):
    self.agents[player].load_model(path)

  def watch_game(self):
    self.game.reset_game()

    iter = 0
    while not self.game.check_win():
      print("~~~~~~~~~~~~~~~~~~~~~")
      self.game.print_game()
      move = self.agents[iter%2].make_move()
      self.game.make_move(move)
      iter+=1
    print("~~~~~~~~~~~~~~~~~~~~~")
    self.game.print_game()
    



Manager

In [None]:
class Manager():
  def __init__(self):
    self.game = None
    self.trainer = None

  def set_game(self, class_name):
    try:
      game_class = getattr(sys.modules[__name__], class_name)
    except AttributeError:
      print("Class not found.")
      return

    try:
      self.game = game_class()

      self.game.make_move(self.game.get_state())
      self.game.check_win()
      self.game.get_moves(self.game.get_state(),1)
      self.game.get_reward(1)
      self.game.get_winner()
      self.game.reset_game()
      self.game.print_game()
    except:
      print("Class missing required functions.")
      self.game = None
    
    
  def set_trainer(self, layers, lrnf, expf):
    
    try:
      if self.game == None:
        print("Game is not defined.")
        return
      self.trainer = Trainer(self.game, layers, lrnf, expf)
    except:
      print("Incorrect parameters.")
      self.trainer = None
    
  def train_agents(self, loops):
    for i in range(loops):
      self.trainer.train_agents()

  def test_agents(self, loops):
    self.trainer.test_agent(loops,0)
    self.trainer.test_agent(loops,1)

  def get_trainer(self):
    return self.trainer


# Games Initialization

Prisoner's Dilemma


In [None]:
#SCORES:
#    | 1 |-1 |
#  1 |3,3|0,5|
# -1 |5,0|1,1|

class PriDilemma():

  def __init__ (self):
    self.state = [0,0,0,0,0,0,0,
                  0,0,0,0,0,0,0]
    self.mirrorGame = [0,0,0,0,0,0,0,
                       0,0,0,0,0,0,0]
    self.tempMoves = [0,0]
    self.winner = 0
    self.player = 1
    self.turn = 0
    self.playerScores = [0,0]
    self.mirrorScores = [0,0]
    
  def reset_game(self): #mandatory: reset board
    self.state = [0,0,0,0,0,0,0,
                  0,0,0,0,0,0,0]
    self.mirrorGame = [0,0,0,0,0,0,0,
                       0,0,0,0,0,0,0]
    self.tempMoves = [0,0]
    self.winner = 0
    self.player = 1
    self.turn = 0
    self.playerScores = [0,0]
    self.mirrorScores = [0,0]

  def make_move(self, new_state): #mandatory: in board state after move 
    spots = {1:0,-1:1}
    jump = 0
    if self.player == -1:
      jump = 7

    self.tempMoves[spots[self.player]] = new_state[self.turn+jump]
    self.player *= -1

    if self.player == 1:
      self.state[self.turn] = self.tempMoves[0]
      self.state[self.turn+7] = self.tempMoves[1]
      self.make_mirror_move()
      self.find_scores()
      self.turn += 1

  def make_mirror_move(self):
    p = random.uniform(0,1)
    if p > 0.8:
      self.mirrorGame[self.turn] = random.choice([-1,1])
    else:
      self.mirrorGame[self.turn] = self.tempMoves[0]*-1

    p = random.uniform(0,1)
    if p > 0.8:
      self.mirrorGame[self.turn+7] = random.choice([-1,1])
    else:
      self.mirrorGame[self.turn+7] = self.tempMoves[1]*-1

    
  def check_win(self): #mandatory: return if game is finished
    if 0 in self.state:
      return False
    self.find_winner()
    return True

  def get_moves(self, state, player): #mandatory: return all possible board states after 'player' moves, in 'state'
    move_set = []
    jump = 0
    if player == -1:
      jump = 7

    while isinstance(state[0], list):
      state = state[0]

    iter = self.turn
     
    move_set.append(state[:iter+jump] + [-1] + state[iter+1+jump:])
    move_set.append(state[:iter+jump] + [1] + state[iter+1+jump:])

    return move_set

  def get_state(self): #mandatory: return board state
    return self.state

  def get_reward(self, player): #mandatory: return rewards for curent board state for 'player'
    spots = {1:0,-1:1}
    reward = 0

    if self.mirrorScores[spots[player]] >= self.playerScores[spots[player]]:
      reward -= 1
    else:
      reward += 0.5

    if self.playerScores[spots[player*-1]] > self.playerScores[spots[player]]:
      reward -= 0.5
    else:
      reward += 1

    return reward
    

  def get_winner(self): #mandatory: return winner
    return self.winner

  def find_winner(self):
    if self.playerScores[0] > self.playerScores[1]:
      self.winner = 1
    elif self.playerScores[0] < self.playerScores[1]:
      self.winner = -1

  def find_scores(self):
    iter = self.turn

    if self.state[iter] == 1 and self.state[iter+7] == 1:
      self.playerScores[0] += 3
      self.playerScores[1] += 3
    elif self.state[iter] == 1 and self.state[iter+7] == -1:
      self.playerScores[0] += 0
      self.playerScores[1] += 5
    elif self.state[iter] == -1 and self.state[iter+7] == 1:
      self.playerScores[0] += 5
      self.playerScores[1] += 0
    else:
      self.playerScores[0] += 1
      self.playerScores[1] += 1
    
    if self.mirrorGame[iter] == 1 and self.mirrorGame[iter+7] == 1:
      self.mirrorScores[0] += 3
      self.mirrorScores[1] += 3
    elif self.mirrorGame[iter] == 1 and self.mirrorGame[iter+7] == -1:
      self.mirrorScores[0] += 0
      self.mirrorScores[1] += 5
    elif self.mirrorGame[iter] == -1 and self.mirrorGame[iter+7] == 1:
      self.mirrorScores[0] += 5
      self.mirrorScores[1] += 0
    else:
      self.mirrorScores[0] += 1
      self.mirrorScores[1] += 1

  def print_game(self): #mandatory: print game board
    syms = ['C' if x==1 else 'D' if x==-1 else ' ' for x in self.state]
      
    for sym in syms[:7]:
      print(sym , end = "|")
    print("")
    for sym in syms[7:]:
      print(sym , end = "|")
    print("\n--------------")
  
  def print_mirror(self):
    syms = ['C' if x==1 else 'D' if x==-1 else ' ' for x in self.mirrorGame]
      
    for sym in syms[:7]:
      print(sym , end = "|")
    print("")
    for sym in syms[7:]:
      print(sym , end = "|")
    print("\n--------------")

TicTacToe

In [None]:
class TicTacToe():

  def __init__(self):
    self.state = [0,0,0,
                  0,0,0,
                  0,0,0]
    self.winner = 0
    self.player = 1
    self.turn = 0

  def reset_game(self): #mandatory: reset board
    self.state = [0,0,0,
                  0,0,0,
                  0,0,0]
    self.winner = 0
    self.player = 1
    self.turn = 0

  def make_move(self, new_state): #mandatory: in board state after move 
    self.state = new_state
    self.turn += 1
    self.player *= -1

  def check_win(self): #mandatory: return if game is finished
    if self.check_win_cond() or self.check_tie():
      return True
    return False

  def get_moves(self, state, player): #mandatory: return all possible board states after 'player' moves, in 'state'
    move_set = []

    while isinstance(state[0], list):
      state = state[0]

    for i in range(9):
      if state[i] == 0:
        move_set.append(state[:i] + [player] + state[i+1:])
    
    return move_set

  def get_state(self): #mandatory: return board state
    return self.state

  def get_reward(self, player): #mandatory: return rewards for curent board state for 'player'
    if self.winner == player:
      return 1
    elif self.winner == 0:  
      return 0.5
    elif self.winner is None:
      return 0
    else:
      return -1

  def get_winner(self): #mandatory: return winner
    return self.winner

  def check_win_cond(self):
    win_pos = [[0,1,2],[3,4,5],[6,7,8],[0,3,6],[1,4,7],[2,5,8],[0,4,8],[2,4,6]]
    
    for pos in win_pos:
      conf = [self.state[pos[0]] , self.state[pos[1]] , self.state[pos[2]]]
      if conf == [1,1,1]:
        self.winner = 1
        return True
      elif conf == [-1,-1,-1]:
        self.winner = -1
        return True
      
    return False;

  def check_tie(self):
    if self.turn >= 9:
      self.winner = 0
      return True
    return False

  def print_game(self): #mandatory: print game board
    syms = ['X' if x==1 else 'O' if x==-1 else ' ' for x in self.state]
    print("\n"+syms[0]+"|"+syms[1]+"|"+syms[2])
    print("-----")
    print(syms[3]+"|"+syms[4]+"|"+syms[5])
    print("-----")
    print(syms[6]+"|"+syms[7]+"|"+syms[8])


# Usage

In [None]:
#@title Initialize Game. Type in games class name. Example class_name: "TicTacToe"
class_name = "TicTacToe" #@param {type:"string"}
manager = Manager()
manager.set_game(class_name)

In [None]:
#@title Initialize Trainer. Example layers: "18,9"
layers = "18,9" #@param {type:"string"}
exploration_factor = 0.8 #@param {type:"slider", min:0, max:1, step:0.01}
learning_factor = 0.5 #@param {type:"slider", min:0, max:1, step:0.01}

try:
  layers = layers.split(",")
  layers = [int(x) for x in layers]
except:
  print("Invalid layers input.")

manager.set_trainer(layers,learning_factor,exploration_factor)

In [None]:
#@title Load model from file. 0 - first player, 1 - second player.
file_name = "player1" #@param {type:"string"}
player_number =  0#@param {type:"integer"}

manager.get_trainer().load_agent(player_number, file_name)

In [None]:
#@title Change exploration and lerning factors of agents.
player1_exp_factor = 0.8 #@param {type:"slider", min:0, max:1, step:0.01}
player1_lrn_factor = 0.5 #@param {type:"slider", min:0, max:1, step:0.01}
player2_exp_factor = 0.8 #@param {type:"slider", min:0, max:1, step:0.01}
player2_lrn_factor = 0.5 #@param {type:"slider", min:0, max:1, step:0.01}

manager.get_trainer().tune_agents(player1_exp_factor, player2_exp_factor, player1_lrn_factor, player2_lrn_factor)

In [None]:
#@title Train agents.
global_loops = 20 #@param {type:"slider", min:1, max:100, step:1}
training_loops = 50 #@param {type:"slider", min:0, max:500, step:5}
testing_loops = 200 #@param {type:"slider", min:0, max:1000, step:10}

for i in range(global_loops):
  print("\nLoop ", i+1,end = ": ")
  manager.train_agents(training_loops)
  if testing_loops>0:
    manager.test_agents(testing_loops)

In [None]:
#@title Spar agents.
loops = 1000 #@param {type:"integer"}

manager.get_trainer().spar_agents(loops)

In [None]:
#@title Save model to file. 0 - first player, 1 - second player.
file_name = "player1" #@param {type:"string"}
player_number =  0#@param {type:"integer"}
manager.get_trainer().save_agent(player_number, file_name)

In [None]:
#@title Watch one game between agents. 
manager.get_trainer().watch_game()