# Chapter 13: Deep Learning in Connect Four

New Skills in This Chapter

• Creating a Connect Four game environment

• Coding in complicated Connect Four game rules

• Simulating game data to train a deep neural network

• Designing deep learning game strategies for different players

***

***
*Predicting better than pure guesswork, even if not accurately, delivers real value. A
hazy view of what’s to come outperforms complete darkness by a landslide.*


–Eric Seigel, Predictive Analytics: The Power to Predict Who Will Click, Buy, Lie, or Die


***

In [1]:
import os

os.makedirs("files/ch13", exist_ok=True)

# 13.1. Create A Connect Four Game Environment

## 13.1.2. Verify the Connect Four Game Environment

In [2]:
from utils.conn_env import conn

env = conn()
env.reset()                    
env.render()

You should see a separate turtle window, with a game board as follows: 
<img src="https://gattonweb.uky.edu/faculty/lium/ml/conn_start.png" />

If you want to close the game board window, use the *close()* method, like so:

In [3]:
env.close()

In [4]:
env=conn()
# check the action space
number_actions = env.action_space.n
print("the number of possible actions is", number_actions)
# check the shape of the observation space
print("the shape of the observation space is",\
      env.observation_space.shape)

the number of possible actions is 7
the shape of the observation space is (7, 6)


The meanings of the actions in this game are as follows
* 1: Placing a game piece in column 1
* 2: Placing a game piece in column 2
* ...
* 7: Placing a game piece in column 7


The state space is a matrix with 7 columns and 6 rows: 
* 0 means the cell is empty; 
* -1 means the cell is occupied by the yellow player; 
* 1 means the cell is occupied by the red player.

## 13.1.3. Play A Connect Four Game

In [5]:
import time
import random

env=conn()
state=env.reset()   
env.render()
print(f"the current state is \n{state.T[::-1]}")    
print('enter a number between 1 and 7')
while True:   
    action=int(input("Player red, what's your move?"))
    time.sleep(1)
    print(f"Player red has chosen action {action}")    
    state,reward,done,info=env.step(action)
    print(f"the current state is \n{state.T[::-1]}")
    env.render()
    if done:
        if reward==1:
            print(f"Player red has won!") 
        else:
            print(f"It's a tie!") 
        break
    action=random.choice(env.validinputs)
    time.sleep(1)
    print(f"Player yellow has chosen action {action}")    
    state,reward,done,info=env.step(action)
    env.render()
    print(f"the current state is \n{state.T[::-1]}")
    if done:
        if reward==-1:
            print(f"Player yellow has won!") 
        else:
            print(f"It's a tie!") 
        break    
env.close()      

the current state is 
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]]
enter a number between 1 and 7
Player red, what's your move?4
Player red has chosen action 4
the current state is 
[[0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 0 0 0 0]
 [0 0 0 1 0 0 0]]
Player yellow has chosen action 6
the current state is 
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  1  0 -1  0]]
Player red, what's your move?3
Player red has chosen action 3
the current state is 
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  1  1  0 -1  0]]
Player yellow has chosen action 4
the current state is 
[[ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0  0  0  0  0]
 [ 0  0  0 -1  0  0  0]
 [ 0  0  1  1  0 -1  0]]
Player red, what's your mo

# 13.2. Train A Deep Neural Network

## 13.2.2. Simulate Connect Four Games

In [6]:
import numpy as np
from pprint import pprint

env=conn()
def one_game():
    history=[]
    state=env.reset()   
    while True:   
        action=random.choice(env.validinputs)  
        state,reward,done,info=env.step(action)
        history.append(np.array(state).reshape(7,6))
        if done:
            break
    return history, reward
# Simulate one game and print out results
history,outcome=one_game()
pprint(history)
pprint(outcome)        

[array([[0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [1, 0, 0, 0, 0, 0]]),
 array([[ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  0]]),
 array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  0]]),
 array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 1, -1,  0,  0,  0,  0]]),
 array([[ 1,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
      

In [7]:
# Simulate 100000 games
results=[]        
for x in range(100000):
    history,outcome=one_game()
    # Associate each board with the game outcome
    for board in history:
        results.append((outcome,board))    

In [8]:
import pickle
# save the simulation data on your computer
with open('files/ch13/games_conn100K.p', 'wb') as fp:
    pickle.dump(results,fp)
# read the data and print out the first 10 observations       
with open('files/ch13/games_conn100K.p', 'rb') as fp:
    games=pickle.load(fp)
pprint(games[:10])

[(-1,
  array([[1, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0]])),
 (-1,
  array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0]])),
 (-1,
  array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  0],
       [-1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0]])),
 (-1,
  array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 1,  0,  0,  0,  0,  0],
       [-1, -1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0]])),
 (-1,
  array([[ 1,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],


## 13.2.3. Train the Connect Four Game Strategy

In [9]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import Dense, Conv2D, Flatten
from tensorflow.keras.models import Sequential

model=Sequential()
model.add(Conv2D(filters=128,kernel_size=(4, 4),padding="same", 
                 activation="relu", input_shape=(7,6,1)))
model.add(Flatten())
model.add(Dense(units=64, activation="relu"))
model.add(Dense(units=64, activation="relu"))
model.add(Dense(3, activation='softmax'))
model.compile(loss='categorical_crossentropy',
                   optimizer='adam', 
                   metrics=['accuracy'])

In [10]:
with open('files/ch13/games_conn100K.p', 'rb') as fp:
    games=pickle.load(fp)

boards = []
outcomes = []
for game in games:
    boards.append(game[1])
    outcomes.append(game[0])

X = np.array(boards).reshape((-1, 7, 6, 1))
# one_hot encoder, three outcomes: -1, 0, and 1
y = to_categorical(outcomes, num_classes=3)

# Train the model for 100 epochs
model.fit(X, y, epochs=100, verbose=1)
model.save('files/ch13/trained_conn.h5')

# 13.3. Use the Trained Model to Play Connect Four

## 13.3.1. Best Moves

In [11]:
def best_move_red(env):
    # if there is only one valid move, take it
    if len(env.validinputs)==1:
        return env.validinputs[0]
    # set the initial value of bestoutcome        
    bestoutcome=-2;
    bestmove=None    
    # go through all possible moves hypothetically 
    for move in env.validinputs:
        env_copy=deepcopy(env)
        state,reward,done,info=env_copy.step(move)
        state=state.reshape(-1,7,6,1)
        prediction=reload.predict(state,verbose=0)
        # Prob of red wins
        p_win_red=prediction[0][1]
        if p_win_red>bestoutcome:
            # Update the bestoutcome
            bestoutcome=p_win_red
            # Update the best move
            bestmove=move
    return bestmove

In [12]:
def best_move_yellow(env):
    # if there is only one valid move, take it
    if len(env.validinputs)==1:
        return env.validinputs[0]
    # set the initial value of bestoutcome        
    bestoutcome=-2;
    bestmove=None    
    # go through all possible moves hypothetically 
    for move in env.validinputs:
        env_copy=deepcopy(env)
        state,reward,done,info=env_copy.step(move)
        state=state.reshape(-1,7,6,1)
        prediction=reload.predict(state,verbose=0)
        # Prob of yellow wins
        p_win_yellow=prediction[0][2]
        if p_win_yellow>bestoutcome:
            # Update the bestoutcome
            bestoutcome=p_win_yellow
            # Update the best move
            bestmove=move
    return bestmove

## 13.3.2. Test Connect Four Deep Learning Game Strategies

In [13]:
from tensorflow.keras.models import load_model

reload=load_model('files/ch13/trained_conn.h5')

env=conn()
results=[]
for i in range(100):
    state=env.reset() 
    if i%2==0:
        action=random.choice(env.validinputs)
        state, reward, done, info=env.step(action)
    while True:
        if env.turn=="red":
            action=best_move_red(env) 
        else:
            action=best_move_yellow(env)    
        state, reward, done, info=env.step(action)
        if done:
            # result is 1 if the deep learning agent wins
            if reward!=0:
                results.append(1) 
            else:
                results.append(0)    
            break  
        action=random.choice(env.validinputs)   
        state, reward, done, info=env.step(action)
        if done:
            # result is -1 if the deep learning agent loses
            if reward!=0:
                results.append(-1) 
            else:
                results.append(0)    
            break

In [14]:
# count how many times the MCTS agent won
wins=results.count(1)
print(f"the deep learning agent has won {wins} games")
# count how many times the MCTS agent lost
losses=results.count(-1)
print(f"the deep learning agent has lost {losses} games")         
# count how many times the game ties
losses=results.count(0)
print(f"the game has tied {losses} times") 

the deep learning agent has won 100 games
the deep learning agent has lost 0 games
the game has tied 0 times


# 13.4. Animate Deep Learning in Connect Four

## 13.4.1. Print Out Probabilities of Winning for Each Next Move

In [15]:
from utils.ch13util import record_conn

history=record_conn()

In [16]:
p_wins_step0=history[0][1]
for key, value in p_wins_step0.items():
    print(f"If red chooses {key},\
 the probability of winning is {value:.4f}.")

If the red player chooses action 1, the probability of winning is 0.3281.
If the red player chooses action 2, the probability of winning is 0.3718.
If the red player chooses action 3, the probability of winning is 0.4599.
If the red player chooses action 4, the probability of winning is 0.4793.
If the red player chooses action 5, the probability of winning is 0.4599.
If the red player chooses action 6, the probability of winning is 0.4495.
If the red player chooses action 7, the probability of winning is 0.4096.


In [17]:
# save the game history on your computer
with open('files/ch13/conn_game_history.p','wb') as fp:
    pickle.dump(history,fp)

## 13.4.2. Animate A Complete Connect Four Game

In [18]:
import imageio
from PIL import Image

frames=[]
for i in range(8):
    im=Image.open(f"files/ch13/conn_step{i}.ps")
    frame=np.asarray(im)
    frames.append(frame) 
imageio.mimsave("files/ch13/conn_steps.gif",\
                frames,duration=1000)  

If you open the file conn_steps.gif, you'll see the following: 
<img src="https://gattonweb.uky.edu/faculty/lium/ml/conn_steps.gif"/>

The animation shows the game board at each stage of the game. 

## 13.4.3. Animate the Decision-Making Process

In [19]:
from utils.ch13util import stage_pics

stage_pics()

The above script highlights the decision making process of the red player. For example, if you open the file conn_stage0step3.png, you'll see the following picture.
<img src="https://gattonweb.uky.edu/faculty/lium/ml/conn_stage0step3.png" /> It shows the probabilities of the red player winning the game with each hypothetical move. In particular, the probability is 47.93% if the red player chooses Column 4. The choice is highlighted in blue, and that is also the move made by the red player as a result. 

In [20]:
frames=[]

for stage in range(len(history)):
    for step in [1,2,3]:
        file=f"conn_stage{stage*2}step{step}.png"
        im = Image.open("files/ch13/"+file)
        f1=np.asarray(im)
        frames.append(f1)  
imageio.mimsave('files/ch13/conn_DL_probs.gif',\
                frames,duration=500) 

If you open the file conn_DL_probs.gif, you'll see the animation as follows.
<img src="https://gattonweb.uky.edu/faculty/lium/ml/conn_DL_probs.gif" /> Note that your results will likely be different from mine due to the random nature of the game.

## 13.4.4. Combine Board Positions and Decision Making

In [21]:
from utils.ch13util import DL_steps

frames=DL_steps()

If you open the gif file, you'll see the following animation:
<img src="https://gattonweb.uky.edu/faculty/lium/ml/conn_DL_steps.gif"/>

## 13.4.5 Create Subplots of Deep Learning

In [22]:
frames_subplots=frames[2::3]

In [23]:
from matplotlib import pyplot as plt

plt.figure(figsize=(20,28),dpi=200)
for i in range(4):
    plt.subplot(4,1,i+1)
    plt.imshow(frames_subplots[i])
    plt.axis('off')
plt.subplots_adjust(bottom=0.001,right=0.999,top=0.999,
        left=0.001, hspace=-0.1,wspace=-0.22)
plt.savefig("files/ch13/subplots_conn.png")

# 13.6 Exercises

In [24]:
# answer to question 13.2
import time
import random

env=conn()
state=env.reset()   
env.render()
print(f"the current state is \n{state.T[::-1]}")    
print('enter a number between 1 and 7')
while True:   
    action=random.choice(env.validinputs)
    time.sleep(1)
    print(f"Player red has chosen action {action}")    
    state,reward,done,info=env.step(action)
    print(f"the current state is \n{state.T[::-1]}")
    env.render()
    if done:
        if reward==1:
            print(f"Player red has won!") 
        else:
            print(f"It's a tie!") 
        break
    action=int(input("Player red, what's your move?"))
    time.sleep(1)
    print(f"Player yellow has chosen action {action}")    
    state,reward,done,info=env.step(action)
    env.render()
    print(f"the current state is \n{state.T[::-1]}")
    if done:
        if reward==-1:
            print(f"Player yellow has won!") 
        else:
            print(f"It's a tie!") 
        break    
env.close()    

In [25]:
# answer to question 13.3
# stage 2
p_wins_step1=history[1][1]
for key, value in p_wins_step1.items():
    print(f"If red chooses {key},\
 the probability of winning is {value:.4f}.")
# stage 3
p_wins_step2=history[2][1]
for key, value in p_wins_step2.items():
    print(f"If red chooses {key},\
 the probability of winning is {value:.4f}.")  
# stage 4
p_wins_step3=history[3][1]
for key, value in p_wins_step3.items():
    print(f"If red chooses {key},\
 the probability of winning is {value:.4f}.")    