In [1]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from IPython.display import clear_output
import random

Using TensorFlow backend.


In [2]:
class bingo_agent:
    def __init__(self,state_size,action_size,epsilon,epsilon_decay):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = 0.1
        self.Epsilon = epsilon
        self.Gamma = 0.9
        self.Epsilon_decay = epsilon_decay
        self.Epsilon_min = 0.5
        self.model = self.buildModel()
        self.grid = np.random.choice(range(1,26),25,replace = False)
        
    def create_grid(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)   
    
    def show_grid(self,state):
        temp_grid = np.copy(self.grid)
        temp_grid[[i for i in range(25) if state[0,i] == 1]] = 0
        sq_grid = np.reshape(temp_grid,[5,5])
        print (sq_grid)
        
    def perform_move(self,state,action):
        state[0,action] = 1
        lines = self.find_done(state)
        if (lines >= 5):
            done = True
        else:
            done = False
        reward = lines*5
        value = self.grid[action]
        return state,reward,done,value
    
    def find_done(self,state):
        lines = 0
        
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i*5 + j] != 1):
                    line = False
            if (line):
                lines += 1
                
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i + j*5] != 1):
                    line = False
            if (line):
                lines += 1
        
        line = True
        for i in range(5):
            if (state[0,i*6] != 1): 
                line = False
        if (line):
            lines += 1
        
        line = True
        for i in range(5):
            if (state[0,4 + i*4] != 1):
                line = False
        if (line):
            lines += 1
        if lines > 5:
            lines = 5
        return lines
    
    def convert_action(self,value):
        return np.where (self.grid == value)[0][0]
    
    def buildModel(self):
        model = Sequential()
        model.add(Dense(30,input_dim = self.state_size,activation = 'relu'))
        model.add(Dense(30,activation = 'relu'))
        model.add(Dense(self.action_size,activation = 'linear'))
        model.compile(loss = 'mse', optimizer = Adam(lr = self.learning_rate))
        return model
    
    def chooseAction(self,state,lvl = 1):
        if (np.random.uniform() <= self.Epsilon):
            action = random.randrange(self.action_size)
        action = np.argmax(self.model.predict(state))
        if (state[0,action] == 1):
            lvl +=1
            if (lvl>20):
                action = random.choice([i for i in range(self.state_size) if state[0,i] == 0])
            else:
                action = self.chooseAction(state, lvl)
        return action
    
    def train(self,state,action,reward,next_state,done):
        if done:
            target = reward
            if (self.Epsilon > self.Epsilon_min):
                self.Epsilon *= self.Epsilon_decay
        else:
            target = reward + self.Gamma * np.amax(self.model.predict(next_state))
        current = self.model.predict(state)
        current[0][action] = target
        self.model.fit(state,current,epochs=1,verbose=0)
    
    def change_epsilon(self, epsilon,epsilon_min):
        if (epsilon != -1):
            self.Epsilon = epsilon
        if (self.Epsilon_min >= 0.1):
            self.Epsilon_min -= epsilon_min
        
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [3]:
class bingo_player:
    def __init__(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)
        
    def create_grid(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)   
    
    def show_grid(self,state):
        temp_grid = np.copy(self.grid)
        temp_grid[[i for i in range(25) if state[0,i] == 1]] = 0
        sq_grid = np.reshape(temp_grid,[5,5])
        print (sq_grid)
        
    def perform_move(self,state,action):
        state[0,action] = 1
        lines = self.find_done(state)
        if (lines >= 5):
            done = True
        else:
            done = False
        reward = lines*5
        value = self.grid[action]
        return state,reward,done,value
    
    def find_done(self,state):
        lines = 0
        
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i*5 + j] != 1):
                    line = False
            if (line):
                lines += 1
                
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i + j*5] != 1):
                    line = False
            if (line):
                lines += 1
        
        line = True
        for i in range(5):
            if (state[0,i*6] != 1): 
                line = False
        if (line):
            lines += 1
        
        line = True
        for i in range(5):
            if (state[0,4 + i*4] != 1):
                line = False
        if (line):
            lines += 1
        if lines > 5:
            lines = 5
        return lines
    
    def convert_action(self,value):
        return np.where (self.grid == value)[0][0]

In [4]:
np.random.seed(2)
Episodes = 1
state_size = 25
action_size = 25
agent1 = bingo_agent(state_size,action_size,1.0,0.999)
agent2 = bingo_agent(state_size,action_size,1.0,0.999)
human = bingo_player()

In [5]:
agent1.load("Bingo-P1.h5")
agent2.load("Bingo-P2.h5")

In [7]:
done = False
for e in range(Episodes):
    state1 = np.zeros(25)
    state1 = np.reshape(state1, [1,state_size])
    state2 = np.zeros(25)
    state2 = np.reshape(state2, [1,state_size])
    state3 = np.zeros(25)
    state3 = np.reshape(state3, [1,state_size])
    
    agent1.create_grid()
    agent1.create_grid()
    human.create_grid()
    
    steps = 0
    
    done1 = False
    done2 = False
    done3 = False
    
    if (e >= 0):
        agent1.change_epsilon(0.0,0)
        agent2.change_epsilon(0.0,0)
    elif (e % 5000 == 0):
        agent1.change_epsilon(-1,0.1)
        agent2.change_epsilon(-1,0.1)
    
    if (e % 5000 == 0):
        input("Continue")
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print ('Human :')
        human.show_grid(state3)
    elif (e % 1000 == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        
    while done1 == False and done2 == False and done3 == False:
        steps += 1
        action1 = agent1.chooseAction(state1)
        next_state1,reward1,done1,value = agent1.perform_move(state1,action1)
        action2 = agent2.convert_action(value)
        state2,reward2,done2,_ = agent2.perform_move(state2,action2)
        agent1.train(state1,action1,reward1,next_state1,done1)
        state1 = next_state1
        if (e%5000 == 0):
            action3 = human.convert_action(value)
            state3,reward3,done3,value = human.perform_move(state3,action3)
            
        print (action1)
            
        if (done1 == True or done2 == True or done3 == True):
            break
            
        action2 = agent2.chooseAction(state2)
        next_state2,reward2,done2,value = agent2.perform_move(state2,action2)
        action1 = agent1.convert_action(value)
        state1,reward1,done1,_ = agent1.perform_move(state1,action1)
        agent2.train(state2,action2,reward2,next_state2,done2)
        state2 = next_state2
        if (e%5000 == 0):
            action3 = human.convert_action(value)
            state3,reward3,done3,value = human.perform_move(state3,action3)
            
        print (action2)
        
        if (done1 == True or done2 == True or done3 == True):
            break
        
        if (e % 5000 == 0):
            print ('Player 1 :')
            agent1.show_grid(state1)
            print ('Player 2 :')
            agent2.show_grid(state2)
            print ('Human :')
            human.show_grid(state3)
            value = input("Your choice ")
            value = int (value)
            
            action3 = human.convert_action(value)
            state3,reward3,done3,_ = human.perform_move(state3,action3)
            action2 = agent2.convert_action(value)
            state2,reward2,done2,_ = agent2.perform_move(state2,action2)
            action1 = agent1.convert_action(value)
            state1,reward1,done1,value = agent1.perform_move(state1,action1)
        elif (e % 1000 == 0):
            print ('Player 1 :')
            agent1.show_grid(state1)
            print ('Player 2 :')
            agent2.show_grid(state2)

    if (e % 5000 == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print ('Human :')
        human.show_grid(state3)
        print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent1.Epsilon,steps,done1,done2,done3))            
    elif (e % 1000 == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent1.Epsilon,steps,done1,done2,done3))            
    
    print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent1.Epsilon,steps,done1,done2,done3),end = '\r')
    e = -1

Continue
Player 1 :
[[ 6 22 18 11  7]
 [21 19 25  1 14]
 [24  2  4 23  8]
 [15  3 13 16  5]
 [17 20 10 12  9]]
Player 2 :
[[ 6 20  4 12 22]
 [ 1  7  2 25 14]
 [10 11  3  9 23]
 [24  8 18 16  5]
 [19 13 15 21 17]]
Human :
[[ 8  1 11 10  9]
 [ 5 20  6 24 23]
 [12 19 17 14 21]
 [22  3  2  7 13]
 [18 16 25  4 15]]
24
4
Player 1 :
[[ 6  0 18 11  7]
 [21 19 25  1 14]
 [24  2  4 23  8]
 [15  3 13 16  5]
 [17 20 10 12  0]]
Player 2 :
[[ 6 20  4 12  0]
 [ 1  7  2 25 14]
 [10 11  3  0 23]
 [24  8 18 16  5]
 [19 13 15 21 17]]
Human :
[[ 8  1 11 10  0]
 [ 5 20  6 24 23]
 [12 19 17 14 21]
 [ 0  3  2  7 13]
 [18 16 25  4 15]]
Your choice 2
10
14
Player 1 :
[[ 6  0 18 11  7]
 [21 19 25  1 14]
 [ 0  0  4  0  8]
 [15  3 13 16  5]
 [17 20 10 12  0]]
Player 2 :
[[ 6 20  4 12  0]
 [ 1  7  0 25 14]
 [10 11  3  0  0]
 [ 0  8 18 16  5]
 [19 13 15 21 17]]
Human :
[[ 8  1 11 10  0]
 [ 5 20  6  0  0]
 [12 19 17 14 21]
 [ 0  3  0  7 13]
 [18 16 25  4 15]]
Your choice 314


IndexError: index 0 is out of bounds for axis 0 with size 0

In [53]:
while (action3 != -1):
    action3 = input("Your choice ")
    print (action3)

Your choice w
w
Your choice 21
21
Your choice 21
21
Your choice 2
2
Your choice 3
3
Your choice 21
21
Your choice 21
21
Your choice 2
2
Your choice 312
312
Your choice 312
312
Your choice 312
312
Your choice 3
3
Your choice 1232
1232
Your choice 13
13
Your choice 12
12
Your choice 31
31
Your choice -1
-1


KeyboardInterrupt: 

In [61]:
agent1.save("Bingo-P1.h5")
agent2.save("Bingo-P2.h5")