# Игра в крестики-нолики

Для того чтобы запустить игру нужно загрузить все классы и код с игрой (выполнить все ячейки с кодом)

## Core классы игры

In [1]:
class Game:
    
    def __init__(self, field=None):
        self.field = None
        
        if field:
            self.field = field
        else:
            self.start()
    
    def start(self):
        self.field = [' ']*9

    def printField(self):
        row = ''
        for i in range(len(self.field)):
            cell = self.field[i]
            row+='['
            if cell != ' ':
                row+=cell
            else:
                row+=str(i+1)
            row+=']'
            if (i % 3 == 2): 
                print(row)
                row = ''
    
    def set(self, position, side):
        pos = int(position)-1
        self.field[pos] = side

    def getFree(self):
        free = []
        for i in range(len(self.field)):
            cell = self.field[i]
            if (cell == ' '):
                free.append( (i+1) )
        return free
    
    def isDraw(self):
        free = self.getFree();
        return len(free)==0;

    def isWin(self, side):
        for i in range(3):
            isW = True 
            for j in range(3):
                if self.field[i*3+j]!=side:
                    isW = False
                    break
            if isW:
                return isW
            
        for i in range(3):
            isW = True 
            for j in range(3):
                if self.field[j*3+i]!=side:
                    isW = False
                    break
            if isW:
                return isW
            
        isW = True;
        for i in range(3):
            if self.field[i*3+i]!=side:
                isW = False
                break
        if isW:
            return isW
        
        isWi = True;
        for i in range(3):
            if self.field[(i*3+2-i)]!=side:
                isW = False
                break
        if isW:
            return isW

        return False

    def getState(self, side):
        if side == 'x':
            return self.field

        newField = ''
        for i in range(len(self.field)):
            if self.field[i]=='x':
                newField += 'o'
            elif self.field[i]=='o':
                newField += 'x'
            else:
                newField += self.field[i] 

        return newField

In [2]:
import os
import json

class AI:
    def __init__(self):
        self.table = {}
        if os.path.isfile('./rewards.json'):
            with open('rewards.json') as json_file:
                self.table = json.load(json_file)
                print("loaded AI from rewards.json")

    def getReward(self, state):
        game = Game(state)
        
        #если победитель - мы, то оценка состояния игры "1"
        if game.isWin('x'):
            return 1

        #если победиль - соперник, то оценка состояния игры "0"
        if game.isWin('o'):
            return 0

        #смотрим ценность по таблице
        strstate = ''.join(state)
        if strstate in self.table.keys():
            return self.table[strstate]

        #если в таблице нет, то считаем начальной ценностью "0.5"
        return 0.5

    def correct(self, state, newReward):
        oldReward = self.getReward(state)
        strstate = ''.join(state)
        self.table[strstate] = oldReward + 0.1 * (newReward - oldReward)

    def save(self):
        with open('rewards.json', 'w') as outfile:
            json.dump(self.table, outfile)

In [3]:
import random 
import copy

class AIPlayer:
    
    def __init__(self, side, ai, isGreedy=True):
        self.side = side
        self.ai = ai
        self.oldState = None
        self.isGreedy = isGreedy

    def getSide(self):
        return self.side

    def makeStep(self, game):
        #получаем список доступных ходов
        free = game.getFree()
        
        #решаем, является ли текущий ход 
        #зондирующим (случайным) или жадным (максимально выгодным)
        
        if not self.isGreedy:
            #случайный ход
            print('Random step')
            step = random.choice(free)
            game.set(step, self.side)
            self.oldState = game.getState(self.side)
            return step

        #жадный ход
        rewards = {}
        for step in free:
            # для каждого доступного хода оцениваем состояние игры после него
            newGame = copy.deepcopy(game)
            newGame.set(step, self.side)
            rewards[step] = self.ai.getReward(newGame.getState(self.side))

        #выясняем, какое вознаграждение оказалось максимальным
        maxReward = 0
        for reward in rewards.values():
            if reward > maxReward:
                maxReward = reward

        #находим все шаги с максимальным вознаграждением
        steps = []
        
        for step in rewards:
            reward = rewards[step]
            if (maxReward > (reward - 0.01)) and (maxReward < (reward + 0.01)):
                steps.append(step)

        #корректируем оценку прошлого состояния
        #с учетом ценности нового состояния
        if (self.oldState):
            self.ai.correct(self.oldState, maxReward)

        #выбираем ход из ходов с максимальный вознаграждением
        step = random.choice(steps)
        game.set(step, self.side)

        #сохраняем текущее состояние для того, 
        #чтобы откорректировать её ценность на следующем ходе
        self.oldState = game.getState(self.side)
        return step

    def loose(self):
        #корректируем ценность предыдущего состояния при проигрыше
        if self.oldState:
            self.ai.correct(self.oldState, 0)

    def win(self):
        #корректируем ценность предыдущего состояния при выигрыше
        if self.oldState:
            self.ai.correct(self.oldState, 1)
    
    def draw(self):
        #корректируем ценность предыдущего состояния при ничьей
        if self.oldState:
            self.ai.correct(self.oldState, 0.5)

In [4]:
class UserPlayer:

    def __init__(self, side):
        self.side = side

    def getSide(self):
        return self.side

    def makeStep(self, game):
        game.printField()
        
        free = game.getFree()

        inp = None
        while (True):
            inp = input()
            if int(inp) in free:
                break
        
        game.set(inp, self.side)

    def loose(self):
        print('you loose')

    def win(self):
        print('you win')

    def draw(self):
        print('draw')

## Игра

In [5]:
print('Choose your side:')
print('X) x')
print('O) o')
print('Any other symbol if you would like to run AI vs AI')
side = input();

ai = AI()
gameCount = 1

playerX = None
playerO = None

if (side=='X') or (side=='x'):
    playerX = UserPlayer('x')
    playerO = AIPlayer('o', ai, True)
elif (side=='O') or (side=='o'):
    playerX = AIPlayer('x', ai, True)
    playerO = UserPlayer('o')
else:
    playerX = AIPlayer('x', ai, True)
    playerO = AIPlayer('o', ai, True)

    print('Enter games count:')
    gameCount = int(input())

    if (gameCount <= 0):
        gameCount = 1

game = Game()
for i in range(gameCount):
    print('New game', i+1)
    game.start()
    while (True):
        if game.isDraw():
            playerX.draw()
            playerO.draw()
            break

        playerX.makeStep(game)
        if game.isWin(playerX.getSide()):
            playerX.win()
            playerO.loose()
            break

        if game.isDraw():
            playerX.draw()
            playerO.draw()
            break

        field = playerO.makeStep(game)
        if game.isWin(playerO.getSide()):
            playerO.win()
            playerX.loose()
            break
    game.printField()

ai.save()

Choose your side:
X) x
O) o
Any other symbol if you would like to run AI vs AI

loaded AI from rewards.json
Enter games count:



ValueError: invalid literal for int() with base 10: ''

## Жадный Агент

При запуске жадного агента нужно чтобы файл rewards.json находился в том же каталоге что и запускаемый код

In [6]:
def state_to_gmstate(state):
    """
    Переводим состояние игры из формата массива в формат строки,
    который используется игрой
    """
    gmstate = []
    for row in state:
        for ch in row:
            if ch == -1:
                gmstate.append(' ')
            elif ch == 1:
                gmstate.append('x')
            else:
                gmstate.append('o')
    return gmstate

In [7]:
def gmstep_to_step(step):
    """
    Переводим обозначение хода из принятого в игре 
    в формат который ожидается на выходе агента
    """
    steps = {
        1:(1,1),
        2:(1,2),
        3:(1,3),
        4:(2,1),
        5:(2,2),
        6:(2,3),
        7:(3,1),
        8:(3,2),
        9:(3,3),
    }
    return (steps[step][0]-1,steps[step][1]-1)

In [8]:
class AgentTicTacGreedy:
  def __init__(self, is_zero):
    #is_zero == True если нолик
    self.player = None
    ai = AI()
    if is_zero:
        self.player = AIPlayer('o', ai, True)
    else:
        self.player = AIPlayer('x', ai, True)

  def get_action(self, state):
    # пример state = [[1,0,1],[0,1,0],[0,1,0]] 1 - это крестик, 0 - это нолик, -1 - пусто
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    move = self.player.makeStep(game)
    return gmstep_to_step(move) # возвращаем координаты хода

  def is_done(self, state, reward):
    # reward - вознаграждение 1 если выиграли , вызывается когда игра закончена
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    return game.isWin(self.player.getSide())
    

## Рандомный Агент

In [9]:
class AgentTicTacRandom:
  def __init__(self, is_zero):
    #is_zero == True если нолик
    self.player = None
    ai = AI()
    if is_zero:
        self.player = AIPlayer('o', ai, False)
    else:
        self.player = AIPlayer('x', ai, False)

  def get_action(self, state):
    # пример state = [[1,0,1],[0,1,0],[0,1,0]] 1 - это крестик, 0 - это нолик, -1 - пусто
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    move = self.player.makeStep(game)
    return gmstep_to_step(move) # возвращаем координаты хода

  def is_done(self, state, reward):
    # reward - вознаграждение 1 если выиграли , вызывается когда игра закончена
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    return game.isWin(self.player.getSide())

### Проверяем

In [10]:
greedy, rand = AgentTicTacGreedy(False), AgentTicTacRandom(True)

loaded AI from rewards.json
loaded AI from rewards.json


In [11]:
greedy.is_done([[1,0,1],[0,1,0],[0,1,0]], None)

False

In [12]:
rand.is_done([[0,0,1],[0,1,0],[0,1,1]], None)

True

In [13]:
greedy.get_action([[1,-1,1],[0,0,0],[0,1,0]])

(0, 1)

In [14]:
rand.get_action([[1,-1,1],[0,-1,0],[0,1,0]])

Random step


(0, 1)

# Игра в крестики-нолики - эволюционный агент

In [161]:
def invert_state(state):
    result = []
    for ch in state:
        newch = ch
        if ch == 'x':
            newch = 'o'
        if ch == 'o':
            newch = 'x'
        result.append(newch)
    return ''.join(result)

In [162]:
import os
import json
import copy
import random

FILENAME = 'strategies.json'
MUTATE_FRAC = 0.25

class AI_EV:
    def __init__(self):
        if os.path.isfile(f'./{FILENAME}'):
            with open(FILENAME) as json_file:
                self.table = json.load(json_file)
                self.temp_table = copy.deepcopy(self.table)
                print(f"loaded AI from {FILENAME}")
        else:
            raise FileNotFoundError('Error reading strategies file')
    
    def mutate(self):
        cnt = round(MUTATE_FRAC*len(self.temp_table))
        self.temp_table = copy.deepcopy(self.table)
        for state in random.choices(list(self.temp_table.keys()), k=cnt):
            indx = random.choice([i for i, ltr in enumerate(state) if ltr == ' '])
            new_strat = state[:indx] + 'x' + state[indx + 1:]
            self.temp_table[state] = new_strat

    def apply_selection(self):
        self.table = self.temp_table
        
    def get_step(self, state):
        if state not in self.temp_table.keys():
            return invert_state(self.temp_table[invert_state(state)])
        return self.temp_table[state]
        
    def add_step(self, step, act_step):
        self.temp_table[step] = act_step

    def save(self):
        with open(FILENAME, 'w') as outfile:
            json.dump(self.table, outfile)

In [163]:
class AIEVPlayer:
    
    def __init__(self, side, ai):
        self.side = side
        self.ai = ai
        self.ai.mutate()

    def getSide(self):
        return self.side

    def makeStep(self, game):
        curState = ''.join(game.getState(self.side))
        free = game.getFree()
        step = None
        
        if (not curState in self.ai.temp_table) and (not invert_state(curState) in self.ai.temp_table):
            #случайный ход
            print('Random step')
            step = random.choice(free)
            game.set(step, self.side)
            i = step-1
            self.ai.add_step(curState, curState[:i] + self.side + curState[i+1:])
            return step
        
        new_state = self.ai.get_step(curState)
        for i,ch in enumerate(new_state):
            if ch != curState[i]:
                step = i+1
                
        return step

    def loose(self):
        self.ai.mutate()

    def win(self):
        self.ai.apply_selection()
        self.ai.mutate()
    
    def draw(self):
        self.ai.mutate()

In [164]:
table = None
with open('rewards.json') as json_file:
    table = json.load(json_file)
    print(f"loaded rewards.json")

loaded rewards.json


In [165]:
def almost_equal(set_of_words,comp):
    ln = len(comp)
    for word in set_of_words:
        count = 0
        if len(word) == ln:
            for a, b in zip(word, comp):
                count += a != b
                if count == 2:
                    break
            else:
                yield word

In [166]:
strat_table = {}
for k in table:
    l = list(almost_equal(list(table.keys()), k))
    l.remove(k)
    if not l:
        continue
    vals = [table[k] for k in l]
    new_strat = l[vals.index(max(vals))]
    for i,ch in enumerate(k):
        if ch != new_strat[i]:
            if ch==' ':
                new_strat = new_strat[:i] + 'x' + new_strat[i+1:]
                if new_strat != k:
                    strat_table[k] = new_strat
                    break


In [167]:
a = AI_EV()

loaded AI from strategies.json


In [168]:
side = 'x'

ai =  AI_EV()
ai2 = AI()
gameCount = 5000
playerX = AIEVPlayer('x', ai)
playerO = AIPlayer('o', ai2, True)

game = Game()
for i in range(gameCount):
    print('New game', i+1)
    game.start()
    while (True):
        if game.isDraw():
            playerX.draw()
            playerO.draw()
            break

        playerX.makeStep(game)
        if game.isWin(playerX.getSide()):
            playerX.win()
            playerO.loose()
            break

        if game.isDraw():
            playerX.draw()
            playerO.draw()
            break

        field = playerO.makeStep(game)
        if game.isWin(playerO.getSide()):
            playerO.win()
            playerX.loose()
            break
    game.printField()

ai.save()

loaded AI from strategies.json
loaded AI from rewards.json
New game 1
Random step
Random step
Random step
[x][x][3]
[o][5][x]
[o][o][o]
New game 2
Random step
Random step
Random step
[x][x][o]
[o][x][o]
[7][o][o]
New game 3
Random step
Random step
[o][x][o]
[4][5][o]
[7][x][o]
New game 4
Random step
Random step
[1][x][o]
[4][5][o]
[o][x][o]
New game 5
Random step
[x][2][3]
[4][5][6]
[o][o][o]
New game 6
Random step
Random step
[1][o][3]
[x][o][6]
[o][o][x]
New game 7
Random step
Random step
Random step
[x][2][o]
[o][x][x]
[o][o][o]
New game 8
Random step
[o][2][3]
[o][x][6]
[o][8][9]
New game 9
Random step
[o][2][3]
[o][5][6]
[o][8][x]
New game 10
Random step
Random step
Random step
[x][o][x]
[4][o][6]
[o][o][x]
New game 11
Random step
Random step
Random step
Random step
[x][o][o]
[o][x][x]
[x][o][o]
New game 12
Random step
[o][2][3]
[o][x][6]
[o][8][9]
New game 13
Random step
Random step
[x][2][3]
[o][5][x]
[o][o][o]
New game 14
Random step
[o][2][3]
[o][5][6]
[o][8][x]
New game 15
Ra

In [169]:
class AgentTicTacEvo:
  def __init__(self, is_zero):
    #is_zero == True если нолик
    self.player = None
    ai = AI_EV()
    if is_zero:
        self.player = AIPlayer('o', ai, False)
    else:
        self.player = AIPlayer('x', ai, False)

  def get_action(self, state):
    # пример state = [[1,0,1],[0,1,0],[0,1,0]] 1 - это крестик, 0 - это нолик, -1 - пусто
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    move = self.player.makeStep(game)
    return gmstep_to_step(move) # возвращаем координаты хода

  def is_done(self, state, reward):
    # reward - вознаграждение 1 если выиграли , вызывается когда игра закончена
    gmstate = state_to_gmstate(state)
    game = Game(field=gmstate)
    return game.isWin(self.player.getSide())

In [170]:
evo = AgentTicTacEvo(True)

loaded AI from strategies.json


In [171]:
rand.is_done([[0,0,1],[0,1,0],[0,1,1]], None)

True

In [172]:
rand.get_action([[1,-1,1],[0,-1,0],[0,1,0]])

Random step


(1, 1)