In [56]:
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np
import copy
from keras.optimizers import Adam,RMSprop
from keras.models import Sequential
from keras.layers import *
from keras.optimizers import *
import pandas as pd
import seaborn as sns
import random
import math
%run AIPlayer.ipynb
%run SmallGame.ipynb
%run Game.ipynb
%run DDQNAgent.ipynb
%run SmallAgent.ipynb

In [57]:
#Training phase for 8x8 Agent
def initialize_game(agent):
    print("Starting random agent:")
    #creating memory for PER with random matches
    i=0
    while i < agent.memory_cap:
        b=Board(verbose=False)
        pl=AI(0)
        opp=AI(0)
        d=b.get_game_over()
        while d==False and i< agent.memory_cap:
            state_init1 = agent.get_state(b)
            action=pl.move(b)
            reward=agent.set_reward(b,action)
            b.coord_move(action)
            d=b.get_game_over()
            if(d==False):
                b.coord_move(opp.move(b))
                next_state= agent.get_state(b)
                d=b.get_game_over()
            else:
                next_state=np.zeros([64])
            next_state= agent.get_state(b)
            d=b.get_game_over()
            sample=(state_init1, action,reward, next_state, d)
            #I use the reward as error to remember in the memory
            error=abs(sample[2])
            i+=1
            agent.memory.add(error,sample)
    print("Ending of Random creating memory")
    
    
def plot_seaborn_score(array_counter, array_score):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_score])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='score')
    plt.show()
    
def plot_seaborn_win(array_counter, array_win):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_win])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='wins')
    plt.show()
    
def plot_seaborn_predicted(array_counter, array_pred):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_pred])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='number of predicted moves')
    plt.show()
    
def plot_seaborn_wrong(array_counter, array_wrong):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_wrong])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='times where i did a not valid move')
    plt.show()
    
def get_record(score, record):
        if score >= record:
            return score
        else:
            return record 
    
def run():

    agent = DQNAgent()
    print(agent.model.summary())
    counter_games = 0
    counter_move = 0
    counter_notfin = 0
    score_plot = []
    win_plot = []
    wrong_plot = []
    counter_plot =[]
    counter_predicted=[]
    record = 0
    #filling the memory
    initialize_game(agent)
        
    while counter_games < 8000:
        # Initialize classes
        board=Board(verbose=False)
        opponent = AI(0)
        counter_pred = 0        
        wrong_move=False
        
        done=board.get_game_over()

        while not done:
            
            #get old state
            state_old = agent.get_state(board)
            counter_move +=1
            
            #predict the move of the agent
            if random.random() < agent.epsilon:
                if(agent.epsilon < 0.01):
                    print("Possible move:")
                    state_old = agent.get_state(board)
                    possible_moves = board.get_valid_moves()  
                    possible_moves=list(zip(*possible_moves))
                    weights = np.zeros(len(possible_moves[0]))
                    p = np.exp(weights)
                    p /= np.sum(p)
                    choice = np.random.choice(range(len(weights)), p=p)
                    action = possible_moves[0][choice]
                else:
                    print("Random move:")
                    #giving out also wrong possibilities to get also their values for the q-function
                    mov=random.randint(0,63)
                    row=mov//8
                    column=mov%8
                    action=(row,column)
            else:
                print("Predicted move:")
                counter_pred +=1
                state_old = agent.get_state(board)
                pred=agent.predict(state_old.reshape((1,64)))[0]
                mov=np.argmax(pred)
                row=mov//8            
                column=mov%8
                action=(row,column)
    
            #get the reward of the move predicted
            reward= agent.set_reward(board,action)
                
            #perform new move and get new state
            #I also get if the game is ended or not
            print(action)
            try:
                board.coord_move(action)
                done=board.get_game_over()
            except:
                print("Player did a not valid move")
                #ending manually the loop
                wrong_move=True
                done=True
                
            #opponent make move
            #the next state is the state after the move of the opponent so it's again the move of the agent
            if(done==False):
                board.coord_move(opponent.move(board))
                next_state= agent.get_state(board)
                #inspecting if the game ended after the opponent move
                done=board.get_game_over()
            else:
                next_state=np.zeros([64])
            
            # store the new data into a long term memory for the all game
            agent.remember(state_old, action,reward, next_state, done)
            
            #train with replay new
            agent.replay()
            
            #save score for final plot
            record = get_record(board.get_black_score(), record)           
            
        counter_games += 1
        print('Game', counter_games, '      Score:', board.get_black_score())
        score_plot.append(board.get_black_score())
        if board.get_black_score()>=board.get_white_score():
            if wrong_move==True:
                #I did a non valid move and just for a case I had more points but i lost
                win_plot.append(0)
                wrong_plot.append(1)
            else:
                win_plot.append(1)
                wrong_plot.append(0)
        else:
            win_plot.append(0)
            wrong_plot.append(0)
        counter_plot.append(counter_games)
        counter_predicted.append(counter_pred)
    agent.model.save_weights('8x8_weights_new.hdf5')
    plot_seaborn_score(counter_plot, score_plot)
    plot_seaborn_win(counter_plot,win_plot)
    plot_seaborn_predicted(counter_plot,counter_predicted)
    plot_seaborn_wrong(counter_plot,wrong_plot)

  super().__init__(name, **kwargs)


FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '8x8_weights_new.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)

In [None]:
#Training phase for 4x4 Agent
def initialize_gameS(agent):
    print("Starting random agent:")
    #creating memory for PER with random matches
    i=0
    while i < agent.memory_cap:
        b=SmallBoard(verbose=False)
        #b=Board(verbose=False)
        pl=AI(0)
        opp=AI(0)
        d=b.get_game_over()
        while d==False and i< agent.memory_cap:
            state_init1 = agent.get_state(b)
            action=pl.move(b)
            reward=agent.set_reward(b,action)
            b.coord_move(action)
            d=b.get_game_over()
            if(d==False):
                b.coord_move(opp.move(b))
                next_state= agent.get_state(b)
                d=b.get_game_over()
            else:
                next_state=np.zeros([16])
            next_state= agent.get_state(b)
            d=b.get_game_over()
            sample=(state_init1, action,reward, next_state, d)
            #I use the reward as error to remember in the memory
            error=abs(sample[2])
            i+=1
            agent.memory.add(error,sample)
    print("Ending of Random creating memory")
    
    
def plot_seaborn_scoreS(array_counter, array_score):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_score])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='score')
    plt.show()
    
def plot_seaborn_winS(array_counter, array_win):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_win])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='wins')
    plt.show()
    
def plot_seaborn_predictedS(array_counter, array_pred):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_pred])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='number of predicted moves')
    plt.show()
    
def plot_seaborn_wrongS(array_counter, array_wrong):
    sns.set(color_codes=True)
    ax = sns.regplot(np.array([array_counter])[0], np.array([array_wrong])[0], color="b", x_jitter=.1, line_kws={'color':'green'})
    ax.set(xlabel='games', ylabel='times where i did a not valid move')
    plt.show()
    
def get_recordS(score, record):
        if score >= record:
            return score
        else:
            return record 
    
def runS():

    agent = DQNSmallAgent()
    print(agent.model.summary())
    counter_games = 0
    counter_move = 0
    counter_notfin = 0
    score_plot = []
    win_plot = []
    wrong_plot = []
    counter_plot =[]
    counter_predicted=[]
    record = 0
    #filling the memory
    initialize_gameS(agent)
        
    while counter_games < 7000:
        # Initialize classes
        board = SmallBoard(verbose=False)
        #b=Board(verbose=False)
        opponent = AI(0)
        counter_pred = 0        
        wrong_move=False
        
        done=board.get_game_over()

        while not done:
            
            #get old state
            state_old = agent.get_state(board)
            counter_move +=1
            
            #predict the move of the agent
            #action= agent.makemove(board)
            if random.random() < agent.epsilon:
                if(agent.epsilon < 0.01):
                    print("Possible move:")
                    state_old = agent.get_state(board)
                    possible_moves = board.get_valid_moves()  
                    possible_moves=list(zip(*possible_moves))
                    weights = np.zeros(len(possible_moves[0]))
                    p = np.exp(weights)
                    p /= np.sum(p)
                    choice = np.random.choice(range(len(weights)), p=p)
                    action = possible_moves[0][choice]
                else:
                    print("Random move:")
                    #giving out also wrong possibilities to get also their values for the q-function
                    mov=random.randint(0,15)
                    row=mov//4
                    column=mov%4
                    action=(row,column)
            else:
                print("Predicted move:")
                counter_pred +=1
                state_old = agent.get_state(board)
                pred=agent.predict(state_old.reshape((1,16)))[0]
                mov=np.argmax(pred)
                row=mov//4
                column=mov%4
                action=(row,column)
    
            #get the reward of the move predicted
            reward= agent.set_reward(board,action)
                
            #perform new move and get new state
            #I also get if the game is ended or not
            print(action)
            try:
                board.coord_move(action)
                done=board.get_game_over()
            except:
                print("Player did a not valid move")
                #ending manually the loop
                wrong_move=True
                done=True
                
            #opponent make move
            #the next state is the state after the move of the opponent so it's again the move of the agent
            if(done==False):
                board.coord_move(opponent.move(board))
                next_state= agent.get_state(board)
                #inspecting if the game ended after the opponent move
                done=board.get_game_over()
            else:
                next_state=np.zeros([16])
            
            # store the new data into a long term memory for the all game
            agent.remember(state_old, action,reward, next_state, done)
            
            #train with replay new
            agent.replay()
            
            #save score for final plot
            record = get_recordS(board.get_black_score(), record)           
            
        counter_games += 1
        print('Game', counter_games, '      Score:', board.get_black_score())
        score_plot.append(board.get_black_score())
        if board.get_black_score()>=board.get_white_score():
            if wrong_move==True:
                #I did a non valid move and just for a case I had more points but i lost
                win_plot.append(0)
                wrong_plot.append(1)
            else:
                win_plot.append(1)
                wrong_plot.append(0)
        else:
            win_plot.append(0)
            wrong_plot.append(0)
        counter_plot.append(counter_games)
        counter_predicted.append(counter_pred)
    agent.model.save_weights('4x4_weights_new.hdf5')
    plot_seaborn_scoreS(counter_plot, score_plot)
    plot_seaborn_winS(counter_plot,win_plot)
    plot_seaborn_predictedS(counter_plot,counter_predicted)
    plot_seaborn_wrongS(counter_plot,wrong_plot)

In [None]:
def runGames():
    b = SmallBoard()
    agent = DQNSmallAgent("4x4_weights_new.hdf5")
    opp = AI(0)
    counter_games = 0
    win_plot = []
    counter_plot = []
    counter_wrong = 0
    counter_pred = 0
    
    while counter_games < 1000:
        wrong_move = False
        i = 0
        b = SmallBoard()
        d = b.get_game_over()
        
        while not d:
            if i == 0:
                try:
                    move = agent.makemove(b)
                    print(move)
                    if not b.is_valid_move(move):
                        print("Invalid move by the agent")
                        wrong_move = True
                        counter_wrong += 1
                        break
                    b.coord_move(move)
                    counter_pred += 1
                except Exception as e:
                    print("Error:", e)
                    wrong_move = True
                    counter_wrong += 1
                    break
                i = 1
            else:
                opp_move = opp.move(b)
                if not b.is_valid_move(opp_move):
                    print("Invalid move by the opponent")
                    wrong_move = True
                    counter_wrong += 1
                    break
                b.coord_move(opp_move)
                i = 0
            
            d = b.get_game_over()
        
        if not wrong_move:
            if b.get_black_score() >= b.get_white_score():
                win_plot.append(1)
            else:
                win_plot.append(0)
        
        counter_games += 1
        counter_plot.append(counter_games)
    
    plot_seaborn_win(counter_plot, win_plot)
    
    if counter_pred > 0:
        print("The percentage of wrong moves is {:.4f}%".format(counter_wrong / counter_pred * 100))
    else:
        print("No predictions made, so percentage of wrong moves is not applicable.")
    
    print("The number of wrong moves is {}".format(counter_wrong))


In [None]:
#Plotting the Q-function
def plotQ():
    b=SmallBoard()
    agent = DQNSmallAgent("4x4_randomweights_3layers_good.hdf5")
    agentBad =DQNSmallAgent("4x4_weights_smalltrain.hdf5")
    b.human_move("B1")
    b.human_move("A3")
    state_old = agent.get_state(b)
    pred=agent.model.predict(state_old.reshape((1,16)))[0]
    predBad=agentBad.model.predict(state_old.reshape((1,16)))[0]
    mov=np.argmax(pred)
    row=mov//4
    column=mov%4
    action=(row,column)
    b.coord_move(action)
    print("\nQ-function of trained agent:")
    print(pred[0],"\t",pred[1],"\t" ,pred[2],"\t",pred[3])
    print(pred[4],"\t", pred[5],"\t", pred[6],"\t", pred[7])
    print(pred[8],"\t", pred[9],"\t", pred[10],"\t", pred[11])
    print(pred[12],"\t", pred[13],"\t", pred[14],"\t", pred[15])
    print("\nQ-function of short trained agent:")
    print(predBad[0],"\t",predBad[1],"\t" ,predBad[2],"\t",predBad[3])
    print(predBad[4],"\t", predBad[5],"\t", predBad[6],"\t", predBad[7])
    print(predBad[8],"\t", predBad[9],"\t", predBad[10],"\t", predBad[11])
    print(predBad[12],"\t", predBad[13],"\t", predBad[14],"\t", predBad[15])

In [None]:
runGames()

1 ◦ ◦ ◦ ◦
2 ◦ ○ ● ◦
3 ◦ ● ○ ◦
4 ◦ ◦ ◦ ◦
  A B C D


  super().__init__(name, **kwargs)


FileNotFoundError: [Errno 2] Unable to open file (unable to open file: name = '4x4_weights_new.hdf5', errno = 2, error message = 'No such file or directory', flags = 0, o_flags = 0)