In [None]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense
from keras.optimizers import Adam
from IPython.display import clear_output
from collections import deque
import matplotlib.pyplot as plt
import random

In [None]:
%matplotlib notebook

In [None]:
class bingo_agent:
    def __init__(self,state_size,action_size,epsilon,epsilon_decay):
        self.state_size = state_size
        self.action_size = action_size
        self.learning_rate = 0.1
        self.Epsilon = epsilon
        self.Gamma = 0.9
        self.Epsilon_decay = epsilon_decay
        self.Epsilon_min = 0.5
        self.batch_size = 5
        self.memory = deque(maxlen = 30)
        self.model = self.buildModel()
        self.grid = np.random.choice(range(1,26),25,replace = False)
        
    def create_grid(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)   
    
    def show_grid(self,state):
        temp_grid = np.copy(self.grid)
        temp_grid[[i for i in range(25) if state[0,i] == 1]] = 0
        sq_grid = np.reshape(temp_grid,[5,5])
        print (sq_grid)
        
    def perform_move(self,state,action):
        state[0,action] = 1
        lines = self.find_done(state)
        if (lines >= 5):
            done = True
        else:
            done = False
        reward = lines*5
        value = self.grid[action]
        return state,reward,done,value
    
    def find_done(self,state):
        lines = 0
        
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i*5 + j] != 1):
                    line = False
            if (line):
                lines += 1
                
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i + j*5] != 1):
                    line = False
            if (line):
                lines += 1
        
        line = True
        for i in range(5):
            if (state[0,i*6] != 1): 
                line = False
        if (line):
            lines += 1
        
        line = True
        for i in range(5):
            if (state[0,4 + i*4] != 1):
                line = False
        if (line):
            lines += 1
        if lines > 5:
            lines = 5
        return lines
    
    def convert_action(self,value):
        return np.where (self.grid == value)[0][0]
    
    def buildModel(self):
        model = Sequential()
        model.add(Dense(30,input_dim = self.state_size,activation = 'relu'))
        model.add(Dense(30,activation = 'relu'))
        model.add(Dense(self.action_size,activation = 'linear'))
        model.compile(loss = 'mse', optimizer = Adam(lr = self.learning_rate))
        return model
    
    def chooseAction(self,state,lvl = 1):
        if (np.random.uniform() <= self.Epsilon):
            action = random.randrange(self.action_size)
        action = np.argmax(self.model.predict(state))
        
        if (state[0,action] == 1):
            action = random.choice([i for i in range(self.state_size) if state[0,i] == 0])
        
        return action
    
    def train(self):
        batch = random.sample(self.memory,self.batch_size)
        loss = 0
        for state,action,reward,next_state,done in batch:
            if done:
                target = reward
            else:
                target = reward + self.Gamma * np.amax(self.model.predict(next_state))
            current = self.model.predict(state)
            current[0][action] = target
            loss += self.model.fit(state,current,epochs=1,verbose=0).history['loss'][0]
        if (self.Epsilon > self.Epsilon_min):
            self.Epsilon *= self.Epsilon_decay
        return loss
    
    def store(self,state,action,reward,next_state,done):
        self.memory.append((state,action,reward,next_state,done))
        
    def change_epsilon(self, epsilon,epsilon_min):
        if (epsilon != -1):
            self.Epsilon = epsilon
        if (self.Epsilon_min >= 0.1):
            self.Epsilon_min -= epsilon_min
        
    def load(self, name):
        self.model.load_weights(name)

    def save(self, name):
        self.model.save_weights(name)

In [None]:
class bingo_player:
    def __init__(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)
        
    def create_grid(self):
        self.grid = np.random.choice(range(1,26),25,replace = False)   
    
    def show_grid(self,state):
        temp_grid = np.copy(self.grid)
        temp_grid[[i for i in range(25) if state[0,i] == 1]] = 0
        sq_grid = np.reshape(temp_grid,[5,5])
        print (sq_grid)
        
    def perform_move(self,state,action):
        state[0,action] = 1
        lines = self.find_done(state)
        if (lines >= 5):
            done = True
        else:
            done = False
        reward = lines*5
        value = self.grid[action]
        return state,reward,done,value
    
    def find_done(self,state):
        lines = 0
        
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i*5 + j] != 1):
                    line = False
            if (line):
                lines += 1
                
        for i in range(5):
            line = True
            for j in range(5):
                if (state[0,i + j*5] != 1):
                    line = False
            if (line):
                lines += 1
        
        line = True
        for i in range(5):
            if (state[0,i*6] != 1): 
                line = False
        if (line):
            lines += 1
        
        line = True
        for i in range(5):
            if (state[0,4 + i*4] != 1):
                line = False
        if (line):
            lines += 1
        if lines > 5:
            lines = 5
        return lines
    
    def convert_action(self,value):
        return np.where (self.grid == value)[0][0]

In [None]:
np.random.seed(2)
Episodes = 100000
state_size = 25
action_size = 25
human_tpa = 200000
show_game_a = 10000
graph_update_freq = 1000

In [None]:
agent1 = bingo_agent(state_size,action_size,1.0,0.9999)
agent2 = bingo_agent(state_size,action_size,1.0,0.9999)
human = bingo_player()

In [None]:
agent1.load("Bingo-P1.h5")
agent2.load("Bingo-P2.h5")

In [None]:
done = False
loss1 = np.array(0)
loss2 = np.array(0)

fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
ax1.title.set_text('Agent 1 loss')
ax2.title.set_text('Agent 2 loss')
plt.ion()
fig.show()
fig.canvas.draw()

for e in range(Episodes):
    state1 = np.zeros(25)
    state1 = np.reshape(state1, [1,state_size])
    state2 = np.zeros(25)
    state2 = np.reshape(state2, [1,state_size])
    state3 = np.zeros(25)
    state3 = np.reshape(state3, [1,state_size])
    
    agent1.create_grid()
    agent2.create_grid()
    human.create_grid()
    
    steps = 0
    
    done1 = False
    done2 = False
    done3 = False
    
    if (e >= human_tpa):
        agent1.change_epsilon(0.0,0)
        agent2.change_epsilon(0.0,0)
    elif (e % show_game_a == 0):
        agent1.change_epsilon(-1,0.1)
        agent2.change_epsilon(-1,0.1)
    
    if (e % human_tpa == 0):
        input("Continue")
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print ('Human :')
        human.show_grid(state3)
    elif (e % show_game_a == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        
    while done1 == False and done2 == False and done3 == False:
        steps += 1
        action1 = agent1.chooseAction(state1)
        next_state1,reward1,done1,value = agent1.perform_move(state1,action1)
        action2 = agent2.convert_action(value)
        state2,reward2,done2,_ = agent2.perform_move(state2,action2)
        agent1.store(state1,action1,reward1,next_state1,done1)
        state1 = next_state1
        if (e % human_tpa == 0):
            action3 = human.convert_action(value)
            state3,reward3,done3,value = human.perform_move(state3,action3)
                        
        if (done1 == True or done2 == True or done3 == True):
            break
            
        action2 = agent2.chooseAction(state2)
        next_state2,reward2,done2,value = agent2.perform_move(state2,action2)
        action1 = agent1.convert_action(value)
        state1,reward1,done1,_ = agent1.perform_move(state1,action1)
        agent2.store(state2,action2,reward2,next_state2,done2)
        state2 = next_state2
        if (e % human_tpa == 0):
            action3 = human.convert_action(value)
            state3,reward3,done3,value = human.perform_move(state3,action3)
                    
        if (done1 == True or done2 == True or done3 == True):
            break
        
        if (e % human_tpa == 0):
            print ('Player 1 :')
            agent1.show_grid(state1)
            print ('Player 2 :')
            agent2.show_grid(state2)
            print ('Human :')
            human.show_grid(state3)
            value = input("Your choice ")
            value = int (value)
            
            action3 = human.convert_action(value)
            state3,reward3,done3,_ = human.perform_move(state3,action3)
            action2 = agent2.convert_action(value)
            state2,reward2,done2,_ = agent2.perform_move(state2,action2)
            action1 = agent1.convert_action(value)
            state1,reward1,done1,value = agent1.perform_move(state1,action1)
        elif (e % show_game_a == 0):
            print ('Player 1 :')
            agent1.show_grid(state1)
            print ('Player 2 :')
            agent2.show_grid(state2)

    loss1 = np.append(loss1,agent1.train())
    loss2 = np.append(loss2,agent2.train())
    if (e % graph_update_freq == 0):
        ax1.plot(loss1)
        ax2.plot(loss2)
        fig.canvas.draw()
    
    if (e % human_tpa == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print ('Human :')
        human.show_grid(state3)
        print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent1.Epsilon,steps,done1,done2,done3))            
    elif (e % show_game_a == 0):
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Player 2 :')
        agent2.show_grid(state2)
        print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent1.Epsilon,steps,done1,done2,done3))            
    
    print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent2.Epsilon,steps,done1,done2,done3),end = '\r')

## Below is Computer vs Human

In [None]:
for e in range(Episodes):
    state1 = np.zeros(25)
    state1 = np.reshape(state1, [1,state_size])
    state3 = np.zeros(25)
    state3 = np.reshape(state3, [1,state_size])
    
    agent1.create_grid()
    human.create_grid()
    
    steps = 0
    
    done1 = False
    done3 = False
    
    agent1.change_epsilon(0.0,0)
    agent2.change_epsilon(0.0,0)
    
    input("Continue")
    print ('Player 1 :')
    agent1.show_grid(state1)
    print ('Human :')
    human.show_grid(state3)
        
    while done1 == False and done3 == False:
        steps += 1
        action1 = agent1.chooseAction(state1)
        state1,reward1,done1,value = agent1.perform_move(state1,action1)
        action3 = human.convert_action(value)
        state3,reward3,done3,value = human.perform_move(state3,action3)
                        
        if (done1 == True or done3 == True):
            break
            
        print ('Player 1 :')
        agent1.show_grid(state1)
        print ('Human :')
        human.show_grid(state3)
        value = input("Your choice ")
        value = int (value)

        action3 = human.convert_action(value)
        state3,reward3,done3,_ = human.perform_move(state3,action3)
        action1 = agent1.convert_action(value)
        state1,reward1,done1,value = agent1.perform_move(state1,action1)

    print ('Player 1 :')
    agent1.show_grid(state1)
    print ('Human :')
    human.show_grid(state3)          
    
    print("Episode: {}/{}, e: {:.2}, Steps: {}, Winner: Player 1 : {}, Player 2 : {}, Human : {}"
          .format(e, Episodes, agent2.Epsilon,steps,done1,done2,done3))

In [None]:
for i in range(100):
    print (np.random.uniform())

In [None]:
agent1.save("Bingo-P1.h5")
agent2.save("Bingo-P2.h5")

In [None]:

x = np.array([1])

fig = plt.figure()
ax1 = fig.add_subplot(211)
ax2 = fig.add_subplot(212)
ax1.title.set_text('First Plot')
ax2.title.set_text('Second Plot')
plt.ion()

fig.show()
fig.canvas.draw()

for i in range(0,100):
    x = np.append(x,i%4)
    ax1.plot(x)
    ax2.plot(x%8)
    fig.canvas.draw()