In [1]:
import tensorflow as tf
import numpy as np
from random import shuffle
from tensorflow.contrib import rnn
tf.logging.set_verbosity(tf.logging.INFO)
tf.reset_default_graph()
from IPython.core.display import Image, display
from collections import deque
import gym
import random
import time

# 1. Q Learning based on Q-table

### initialize

In [None]:
from gym.envs.registration import register
register(
    id='FrozenLakeNotSlippery-v0',
    entry_point='gym.envs.toy_text:FrozenLakeEnv',
    kwargs={'map_name' : '4x4', 'is_slippery': False}, # for testing purpose, set it to be deterministic
    max_episode_steps=100,
    reward_threshold=0.78, # optimum = .8196
)

In [None]:
env = gym.make('FrozenLakeNotSlippery-v0')
action_size = env.action_space.n
state_size = env.observation_space.n

In [None]:
print('Action size is: ' + str(action_size))
print('State size is: ' + str(state_size))

In [None]:
qtable = np.zeros((state_size, action_size))
print(qtable)

### params

In [None]:
total_episodes = 50000        # Total episodes
learning_rate = 0.8           # Learning rate
max_steps = 99                # Max steps per episode
gamma = 0.95                  # Discounting rate

# Exploration parameters
epsilon = 1.0                 # Exploration rate
max_epsilon = 1.0             # Exploration probability at start
min_epsilon = 0.01            # Minimum exploration probability 
decay_rate = 0.01             # Exponential decay rate for exploration prob

### Use Q Table Algorithm

In [None]:
rewards = []

for episode in range(total_episodes):
    
    # reset evn
    state = env.reset() # set state --> 0, state from {0,1,2,...,15} 
    step = 0
    done = False
    rewards_episode = 0
    
    # start this episode
    for step in range(max_steps):
        
        # decide action
        random_0_1 = random.uniform(0, 1)
        if random_0_1 > epsilon: # exploitation
            action = np.argmax(qtable[state,:]) # select max q action given state
            
        else: # exploration
            action = env.action_space.sample() # action sampled from {0,1,2,3}
            #LEFT = 0, DOWN = 1, RIGHT = 2, UP = 3
        
        # implement action
        new_state, reward, done, info = env.step(action) # reward from {0,1}, done from {True, False}
        
        # update q table
        qtable[state, action] = qtable[state, action] + \
            learning_rate * (reward + gamma * np.max(qtable[new_state, :]) - qtable[state, action])
            
        # update reward and state
        rewards_episode += reward
        state = new_state

        # Do not continue if done
        if done == True: 
            break
            
    # After finishing one episode
    episode += 1
    
    # Reduce epsilon (because we need less and less exploration)
    epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode) 
    rewards.append(rewards_episode)

print(qtable)

In [None]:
# Formula
display(Image('https://randomant.net/images/algorithm-behind-curtain-2/q_learning_algorithm.jpg', width=500, unconfined=True))

### Run the game

In [None]:
env.reset()
rewards = []

for episode in range(5):
    
    state = env.reset()
    step = 0
    done = False
    print("****************************************************")
    print("EPISODE ", episode)

    for step in range(max_steps):
       
        env.render()
        action = np.argmax(qtable[state,:])
        new_state, reward, done, info = env.step(action)
        
        if done:
            break
        state = new_state
        
env.close()

## 2. Q Learning based on deep learning NN
https://leonardoaraujosantos.gitbooks.io/artificial-inteligence/content/deep_reinforcement_learning.html

### initialize env

In [None]:
env = gym.make('Pong-v0')
observation = env.reset()
observation.shape

In [None]:
#env.render()
#env.close()

### preprocess to reduce space
cppied from https://github.com/dhruvp/atari-pong/blob/master/me_pong.py

In [2]:
def downsample(image):
    # Take only alternate pixels - basically halves the resolution of the image (which is fine for us)
    return image[::2, ::2, :]

def remove_color(image):
    """Convert all color (RGB is the third dimension in the image)"""
    return image[:, :, 0]

def remove_background(image):
    image[image == 144] = 0
    image[image == 109] = 0
    return image

def preprocess_state(input_observation):
    """ convert the 210x160x3 uint8 frame into a 6400 float vector """
    processed_observation = input_observation[35:195] # crop
    processed_observation = downsample(processed_observation)
    processed_observation = remove_color(processed_observation)
    processed_observation = remove_background(processed_observation)
    processed_observation[processed_observation != 0] = 1 # everything else (paddles, ball) just set to 1
    
    # Convert from 80 x 80 matrix to 1600 x 1 matrix
    processed_observation = processed_observation.astype(np.float64).ravel()
    return processed_observation

In [None]:
preprocess_state(observation).shape

### hyperparams

In [None]:
# Basic
total_episodes =         # Total episodes
learning_rate = 0.8           # Learning rate
max_steps = 2000            # Max steps per episode
gamma = 0.95                  # Discounting rate

# Exploration parameters for epsilon greedy strategy
explore_start = 1.0            # exploration probability at start
explore_stop = 0.01            # minimum exploration probability 
decay_rate = 0.0001            # exponential decay rate for exploration prob

# Memory
memory_size = 50000
batch_size = 10000
pretrain_length = batch_size
possible_actions = [2,3]

### define memory
https://medium.freecodecamp.org/an-introduction-to-deep-q-learning-lets-play-doom-54d02d8017d8

In [None]:
class Memory():
    def __init__(self, max_size):
        self.buffer = deque(maxlen = max_size)
    
    def add(self, experience):
        self.buffer.append(experience)
    
    def sample(self, batch_size):
        buffer_size = len(self.buffer)
        index = np.random.choice(np.arange(buffer_size),
                                size = batch_size,
                                replace = False)
        
        return [self.buffer[i] for i in index]
    def all_records(self):
        return self.buffer

In [None]:
# Render the environment
init_state = env.reset()

# Instantiate memory
memory = Memory(max_size = memory_size)

for i in range(pretrain_length):
    #env.render()
    if i == 0:
        state = init_state
        
    # Random action
    action = random.choice(possible_actions)
    next_state, reward, done, info = env.step(action)
    
    if done:
        next_state = np.zeros(state.shape)
    else:
        next_state = next_state
        
    memory.add((preprocess_state(state), action, reward, preprocess_state(next_state), done))
    
    if done:
        state = env.reset()
    else:
        state = next_state

#env.close()

In [None]:
memory.sample(1) # state, action, reward, next state

Take a look at how random action performs

In [None]:
num_loses = 0
num_wins = 0
for record in memory.all_records():
    if record[2] < 0:
        num_loses += 1
    if record[2] > 0:
        num_wins +=1
print('Num of loses: ', num_loses, '; Num of wins:', num_wins)

### define NN (for now, simplest 1-hidden layer NN)

In [None]:
def Q_learning_NN(features, labels, mode):
    
    # Read action
    actions = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        actions = features['actions']
    
    # input state
    input_layer = features["states"] # - 6400
    #input_layer = tf.layers.flatten(input_layer) # - 6400
    
    # hidden layer
    num_hidden_units = 200
    hidden = tf.layers.dense(inputs = input_layer , 
                            units = num_hidden_units,
                            activation = tf.nn.relu)

    # FC layer (dense layer)
    logits = tf.layers.dense(inputs = hidden, units = 2)# move up or down
    
    Final_EstimatorSpec = GenerateEstimatorSpec_Q(logits, labels, mode, actions)
    return(Final_EstimatorSpec)

In [None]:
def GenerateEstimatorSpec_Q(logits, targets, mode, actions):
    # Generate Predictions
    predictions = {
      "best_action_index":  tf.argmax(input = logits, axis=1, name = "V_1"),
      "predicted_Q": logits
    }
    
    # If during PREDICTION mode, just return the predictions
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode = mode, 
                                          predictions = predictions)
    
    # If during TRAIN, calculate squared loss
    predicted_Qs = tf.reduce_sum(tf.multiply(logits, actions), axis=1) # e.g., [q1, q2] * [1.0, 0.0] for 1st action
    loss = tf.reduce_mean(tf.square(targets - predicted_Qs))

    # If during TRAIN, update gradients
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
        train_op = optimizer.minimize(
            loss = loss,
            global_step = tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode = mode, 
                                          loss = loss, 
                                          train_op = train_op)

![image](https://cdn-images-1.medium.com/max/1760/1*ZqML2CCqo455qkxlxJTT2Q.png)

In [None]:
# Set up logging for predictions
tensors_to_log = {"best_action_index": "V_1"}
logging_hook = tf.train.LoggingTensorHook(
      tensors = tensors_to_log, every_n_iter=50)

In [None]:
Q_classifier = tf.estimator.Estimator(
    model_fn = Q_learning_NN, # model function type, 
    model_dir = "./model_files_3",
    params = None)

In [None]:
def get_prediction(states, Q_classifier):
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x = {'states': states}, #features 
        y = None,
        shuffle = False)
    
    best_action_index = []
    predicted_maxQ = []
    
    pred_results = Q_classifier.predict(input_fn = pred_input_fn)
    for pred in pred_results:
        best_action_index.append(pred['best_action_index'])
        predicted_maxQ.append(np.max(pred['predicted_Q']))
    
    return {'best_action_index': best_action_index, 
            'predicted_maxQ': predicted_maxQ}

In [None]:
decay_step = 0 # explore/exploit trade-off

for episode in range(total_episodes):
    
    # initialize game
    state = preprocess_state(env.reset())
    step = 0
    done = False
    reward = 0
    rewards_episode = 0
        
    print("****************************************************")
    print("EPISODE ", episode)

    while step < max_steps:
        
        # increase decay
        env.render()
        decay_step +=1
        step += 1
        
        # decide action
        random_0_1 = random.uniform(0, 1)
        explore_probability = explore_stop + (explore_start - explore_stop) * np.exp(-decay_rate * decay_step)
        
        if random_0_1 <= explore_probability or (episode == 0): # exploration
            action = random.choice(possible_actions) # action sampled from {2,3}
            
        else: # exploration
            best_action_index = get_prediction(np.array([state], np.float64), Q_classifier)['best_action_index'][0]
            action = possible_actions[best_action_index]
            
        # implement best action
        next_state, reward, done, info = env.step(action) # reward from {0,1}, done from {True, False}
        
        # update reward and state
        rewards_episode += reward
        
        if done:
            next_state = np.zeros(state.shape)
            memory.add((state, action, reward, next_state, done))
            state = preprocess_state(env.reset())
            step = max_steps
            
        else:
            next_state = preprocess_state(next_state)
            memory.add((state, action, reward, next_state, done))
            state = next_state          
            
    ### train model at every episode            

    # Obtain random mini-batch from memory

    batch = memory.sample(batch_size)
    states = np.array([each[0] for each in batch]) # 6400
    actions = np.array([each[1] for each in batch])
    rewards = np.array([each[2] for each in batch]) 
    next_states = np.array([each[3] for each in batch])
    dones = np.array([each[4] for each in batch])

    target_Qs_batch = []

    # Get maxQ values for next_state 
    if episode>=1:
        next_state_maxQs = get_prediction(next_states, Q_classifier)['predicted_maxQ']
    else:
        next_state_maxQs = [0] * batch_size

    # Calculate Target Q values for each state in batch
    for i in range(0, len(batch)):
        done = dones[i]
        if done:
            target_Qs_batch.append(rewards[i])
        else:
            target = rewards[i] + gamma * np.max(next_state_maxQs[i])
            target_Qs_batch.append(target)

    target_Qs = np.array([each for each in target_Qs_batch])

    # define input
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        x = {'states': states,
             'actions': np.array([np.array([1.0,0.0], dtype = np.float64) \
                                  if action == 2 else \
                                  np.array([0.0,1.0], dtype = np.float64) for action in actions])},
        y = target_Qs, # targets/labels
        batch_size = batch_size, 
        num_epochs = None,
        shuffle = True)

    # train model
    Q_classifier.train(
        input_fn = train_input_fn,
        steps = 1,
        hooks=[logging_hook])

# 3. Policy gradient

In [3]:
def policy_gradient_NN(features, labels, mode):
    
    # During train, read discounted rewards
    rewards  = None
    if mode == tf.estimator.ModeKeys.TRAIN:
        rewards  = features['rewards']
        
    # input state
    input_layer = features["states"] # - 6400
    
    # hidden layer
    num_hidden_units = 200
    hidden = tf.layers.dense(inputs = input_layer , 
                            units = num_hidden_units,
                            activation = tf.nn.relu)

    # FC layer (dense layer)
    logits = tf.layers.dense(inputs = hidden, units = 2)# move up or down

    Final_EstimatorSpec = GenerateEstimatorSpec_PG(logits, labels, mode, rewards)
    return(Final_EstimatorSpec)

In [4]:
def GenerateEstimatorSpec_PG(logits, actions, mode, rewards):

    # Generate Predictions
    outputs_softmax = tf.nn.softmax(logits,name = "V_2")
    predictions = {
      "best_action_index":  tf.argmax(input = logits, axis = 1,name = "V_1"),
      "predicted_softmax": outputs_softmax
    }
    
    # If during PREDICTION mode, just return the predictions
    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(mode = mode, 
                                          predictions = predictions)
    
    # If during TRAIN, calculate weighted loss
    log_prob = tf.log(outputs_softmax) # take log, shape is (batch_size, 2)
    indices = tf.range(0, tf.shape(log_prob)[0]) * \
              tf.shape(log_prob)[1] + actions
    act_prob = tf.gather(tf.reshape(log_prob, [-1]), indices)
    loss = -tf.reduce_sum(tf.multiply(act_prob, rewards))

    # If during TRAIN, update gradients
    if mode == tf.estimator.ModeKeys.TRAIN:
        optimizer = tf.train.GradientDescentOptimizer(learning_rate = 0.001)
        train_op = optimizer.minimize(
            loss = loss,
            global_step = tf.train.get_global_step())
        return tf.estimator.EstimatorSpec(mode = mode, 
                                          loss = loss, 
                                          train_op = train_op)

In [5]:
def discount_and_norm(episode_rewards, gamma):
    discounted_episode_rewards = np.zeros_like(episode_rewards)
    cumulative = 0
    for t in reversed(range(len(episode_rewards))):
        cumulative = cumulative * gamma + episode_rewards[t]
        discounted_episode_rewards[t] = cumulative

    discounted_episode_rewards -= np.mean(discounted_episode_rewards)
    discounted_episode_rewards /= np.std(discounted_episode_rewards)
    return discounted_episode_rewards

In [6]:
PG_classifier = tf.estimator.Estimator(
    model_fn = policy_gradient_NN, # model function type, 
    model_dir = "./model_files_4",
    params = None)

INFO:tensorflow:Using default config.
INFO:tensorflow:Using config: {'_model_dir': './model_files_4', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': None, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1c20a57b70>, '_task_type': 'worker', '_task_id': 0, '_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [7]:
def get_prediction(states, PG_classifier):
    pred_input_fn = tf.estimator.inputs.numpy_input_fn(
        x = {'states': states}, #features 
        y = None,
        shuffle = False)
    
    best_action_index = []
    pred_results = PG_classifier.predict(input_fn = pred_input_fn)
    for pred in pred_results:
        (p_0, p_1) = pred['predicted_softmax']
        if np.random.uniform() > p_0:
            best_action_index.append(1)
        else:
            best_action_index.append(0)
    return best_action_index

![image](https://leimao.github.io/images/articles/2017-05-04-REINFORCE-Policy-Gradient/Sutton_REINFORCE.png)

### hyperparams

In [8]:
# Basic
total_episodes = 5       # Total episodes
learning_rate = 0.8           # Learning rate
max_steps = 2000            # Max steps per episode
gamma = 0.95                  # Discounting rate

# Memory
memory_size = 50000
batch_size = 10000
pretrain_length = batch_size
possible_actions = [2,3]

### Start training

In [9]:
class records:
    def __init__(self):
        self.records = {'states':[], 'actions':[], 'rewards':[]}
    
    def add(self, state, action, reward):
        self.records['states'].append(state)
        self.records['actions'].append(action)
        self.records['rewards'].append(reward)
        
    def get(self, attribute):
        return np.array(self.records[attribute], np.float64)

In [10]:
# Set up logging for predictions
tensors_to_log = {"best_action_index": "V_1", "output_soft_max":"V_2"}
logging_hook = tf.train.LoggingTensorHook(
      tensors = tensors_to_log, every_n_iter=50)

In [None]:
env = gym.make('Pong-v0')

for episode in range(total_episodes):
    
    # initialize game
    state = preprocess_state(env.reset())
    records_episode = records()
    step = 0
    done = False

    print("****************************************************")
    print("EPISODE ", episode)

    while step < max_steps:
        
        # increase step
        env.render()
        step += 1
        
        # decide action
        if (episode == 0): # exploration
            action = random.choice(possible_actions) # action sampled from {2,3}
        else:
            action = possible_actions[get_prediction(np.array([state], np.float64), #(1,6400)
                                                     PG_classifier)[0]]
            
        # implement best action
        next_state, reward, done, info = env.step(action)
        records_episode.add(state, action, reward)
        
        if done:
            state = preprocess_state(env.reset())
            step = max_steps
            
        else:
            next_state = preprocess_state(next_state)
            state = next_state 
            
    ### train model at every episode            
    discount_and_norm_rewards = discount_and_norm(records_episode.get('rewards'), gamma)
    
    # define input
    train_input_fn = tf.estimator.inputs.numpy_input_fn(
        
        x = {'states': records_episode.get('states'),
             'rewards': discount_and_norm_rewards},
        
        y = np.array([0 if action == 2 else 1 for action in records_episode.get('actions')], 
                     np.int32), 
        batch_size = batch_size, 
        num_epochs = None,
        shuffle = True)

    # train model
    PG_classifier.train(
        input_fn = train_input_fn,
        steps = 1,
        hooks=[logging_hook])

****************************************************
EPISODE  0
INFO:tensorflow:Create CheckpointSaverHook.
INFO:tensorflow:Saving checkpoints for 1 into ./model_files_4/model.ckpt.
INFO:tensorflow:best_action_index = [1 1 1 ..., 1 1 1], output_soft_max = [[ 0.47960894  0.52039106]
 [ 0.45714969  0.54285031]
 [ 0.44637114  0.55362886]
 ..., 
 [ 0.47181032  0.52818968]
 [ 0.44939609  0.55060391]
 [ 0.46275506  0.53724494]]
INFO:tensorflow:loss = -16.515899556, step = 1
INFO:tensorflow:Loss for final step: -16.515899556.
****************************************************
EPISODE  1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-1
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:t

INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:tensorflow:Restoring parameters from ./model_files_4/model.ckpt-2
INFO:t

KeyboardInterrupt: 

In [12]:
env.close()