# 1. Test Random Environment with OpenAI Gym

In [1]:
from gym import Env
from gym.spaces import Discrete, Box
import numpy as np
import random
from tqdm import tqdm
import json

In [2]:
history_len = 5
batch_size = 20
steps = 80000
random_batches = False
specifyer = "trace_gcc"
checkpoint_filename = "checkpoint.json"
checkpoint_processed_data = True

In [3]:
import numpy
import json
import numpy as np
from copy import deepcopy


def setup_data(checkpoint_filename, specifyer, save_file=False):
    data = []
    addresses = []
    
    with open(checkpoint_filename, 'r') as f:
        raw_traces = json.load(f)
        
    traces = raw_traces[specifyer]


    for trace in tqdm(traces):

        address = trace["branch_address"]
        branch = trace["branch"]

        if address not in addresses:
            addresses.append(address)
            stack_pointer = 0
            history = []

            for i in range(history_len):
                history.append(False)

            tmp = [address, np.array(history), branch, stack_pointer]

            data.append(tmp)

        else:
            for el in data[::-1]:
                if el[0] == address:
                    last_data = el[1]
                    last_branch = el[2]
                    stack_pointer = el[3]
                    break


            history = np.array(deepcopy(last_data))

            history[stack_pointer] = last_branch

            if stack_pointer < history_len-1:
                stack_pointer = stack_pointer + 1

            tmp = [address, history, branch, stack_pointer]

            data.append(tmp)

            
    if save_file:
        save_filename = specifyer.split(".")[0] + ".npy"
        np.save(save_filename, data)
            
    return data

if checkpoint_processed_data:
    with open(specifyer.split(".")[0] + ".npy", 'rb') as f:
        data = np.load(f, allow_pickle=True)
else:
    data = setup_data(checkpoint_filename, specifyer, save_file=True)

In [4]:
global start
start = 0

In [5]:
import random
def get_batch(data):
    batch = []
    
    global start
    
    if random_batches:
        start = random.randint(0, len(data)-batch_size)
    
    for i in range(batch_size):
        index = i + start
        
        if index > len(data)-1:
            start = 0
            index = 0
            
        batch.append(data[index])
    
    if not random_batches:
        start = index+1
            
    return batch


In [6]:
class ShowerEnv(Env):
    def __init__(self):
        # Actions we can take --> not branch->0 , branch->1
        self.action_space = Discrete(2)
        # Parameter shape for the AI
        self.observation_shape = (history_len)
        self.observation_space = Box(low = np.zeros(self.observation_shape), 
                                            high = np.ones(self.observation_shape), dtype=np.ubyte)
        
        #get end
        self.length = len(data)
        
        #Set start value
        self.batch = get_batch(data)
        self.index = 0
        
        self.label = self.batch[self.index][2]
        self.state = self.batch[self.index][1]        

        
    def step(self, action):
        self.index += 1 
        
        # Calculate reward
        if action == self.label: 
            reward =1 
        else: 
            reward = -1 
        
        # Check if shower is done
        if self.index == batch_size-1: 
            done = True
        else:
            done = False
        
        # Apply temperature noise
        #self.state += random.randint(-1,1)
        # Set placeholder for info
        info = {}
                
        self.label = self.batch[self.index][2]
        self.state = self.batch[self.index][1]
        
        # Return step information
        return self.state, reward, done, info

    def render(self):
        # Implement viz
        pass
    
    def reset(self):
        self.batch = get_batch(data)
        self.index = 0
        
        self.label = self.batch[self.index][2]
        self.state = self.batch[self.index][1]
        
        return self.state


In [7]:
env = ShowerEnv()

In [8]:
#env.observation_space.sample()

In [9]:
def emulate():
    episodes = 10
    for episode in range(1, episodes+1):
        state = env.reset()
        done = False
        score = 0 

        while not done:
            #env.render()
            action = env.action_space.sample()
            n_state, reward, done, info = env.step(action)
            score+=reward
        print('Episode:{} Score:{}'.format(episode, score))

# 2. Create a Deep Learning Model with Keras

In [10]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten, LeakyReLU
from tensorflow.keras.optimizers import Adam

In [11]:
states = env.observation_space.shape
actions = env.action_space.n

In [12]:
def build_model(states, actions):
    model = Sequential()
    model.add(Flatten(input_shape=(1,history_len)))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(24, activation="relu"))
    model.add(Dense(actions, activation='linear'))
    return model

In [13]:
model = build_model(states, actions)

In [14]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 5)                 0         
_________________________________________________________________
dense (Dense)                (None, 24)                144       
_________________________________________________________________
dense_1 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_2 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_3 (Dense)              (None, 2)                 50        
Total params: 1,394
Trainable params: 1,394
Non-trainable params: 0
_________________________________________________________________


In [15]:
del model 

# 3. Build Agent with Keras-RL

In [16]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [17]:
model = build_model(states, actions)

In [18]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    memory = SequentialMemory(limit=steps, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, 
                  nb_actions=actions, nb_steps_warmup=10, target_model_update=1e-2)
    return dqn

In [19]:
dqn = build_agent(model, actions)
dqn.compile(Adam(learning_rate=1e-3), metrics=['mae'])
dqn.fit(env, nb_steps=steps, visualize=False, verbose=1)

Training for 80000 steps ...
Interval 1 (0 steps performed)
    1/10000 [..............................] - ETA: 5:20 - reward: 1.0000

  batch_idxs = np.random.random_integers(low, high - 1, size=size)


   54/10000 [..............................] - ETA: 1:21 - reward: -0.2593

  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=size)
  batch_idxs = np.random.random_integers(low, high - 1, size=s

526 episodes - episode_reward: 6.722 [-11.000, 19.000] - loss: 2.526 - mae: 4.597 - mean_q: 8.999

Interval 2 (10000 steps performed)
526 episodes - episode_reward: 8.920 [-13.000, 19.000] - loss: 3.219 - mae: 5.552 - mean_q: 10.845

Interval 3 (20000 steps performed)
526 episodes - episode_reward: 9.863 [-3.000, 19.000] - loss: 3.522 - mae: 5.831 - mean_q: 11.480

Interval 4 (30000 steps performed)
527 episodes - episode_reward: 12.465 [-3.000, 19.000] - loss: 3.884 - mae: 6.178 - mean_q: 12.218

Interval 5 (40000 steps performed)
526 episodes - episode_reward: 8.719 [-5.000, 17.000] - loss: 3.957 - mae: 6.212 - mean_q: 12.282

Interval 6 (50000 steps performed)
526 episodes - episode_reward: 10.084 [-5.000, 19.000] - loss: 3.973 - mae: 6.258 - mean_q: 12.393

Interval 7 (60000 steps performed)
527 episodes - episode_reward: 11.137 [-1.000, 19.000] - loss: 4.147 - mae: 6.377 - mean_q: 12.636

Interval 8 (70000 steps performed)
done, took 328.634 seconds


<tensorflow.python.keras.callbacks.History at 0x2410889a288>

In [20]:
#scores = dqn.test(env, nb_episodes=100, visualize=False)
#print(np.mean(scores.history['episode_reward']))

In [21]:
#dqn.load_weights('./models/wrong_percentage_13_15.h5f')

In [22]:
random_batches = False
specifyer = "trace"
checkpoint_filename = "checkpoint.json"
checkpoint_processed_data = True
fraction = False

if checkpoint_processed_data:
    with open(specifyer.split(".")[0] + ".npy", 'rb') as f:
        data = np.load(f, allow_pickle=True)
else:
    data = setup_data(checkpoint_filename, specifyer, save_file=True)
    
if fraction != False:
    data = data[:len(data)//fraction]

# 4. Reloading Agent from Memory

In [23]:
def test_model(input_ar):
    np_ar = np.array([[input_ar]])
    prediction = model.predict(np_ar)[0]
    index = numpy.where(prediction == numpy.amax(prediction))[0][0]
    
    if index == 1:
        return True
    elif index == 0:
        return False

In [24]:
global start
start = 0

def test_ai(data):
    right_count = 0
    wrong_count = 0
    whole_count = 0

    test_len = len(data)
    for i in tqdm(range(int(test_len/batch_size))):
        tmp_batch = get_batch(data)
        for batch_part in tmp_batch:
            whole_count += 1
            tmp_adress = batch_part[0]
            tmp_branch_history = batch_part[1]
            tmp_branch = batch_part[2]

            prediction = test_model(tmp_branch_history)

            if prediction == tmp_branch:
                right_count += 1

            else:
                wrong_count += 1
            
    results = {"right":right_count, "wrong":wrong_count, "whole": whole_count, "wrong_percentage":round((wrong_count/whole_count)*100,2)}
    return results

results = test_ai(data)
print(results)

100%|██████████| 27/27 [00:00<00:00, 87.45it/s]

{'right': 394, 'wrong': 146, 'whole': 540, 'wrong_percentage': 27.04}





In [25]:
dqn.save_weights('wrong_percentage_13.15.h5f', overwrite=True)

In [26]:
del model
del dqn
del env

In [27]:
env = gym.make('CartPole-v0')
actions = env.action_space.n
states = env.observation_space.shape[0]
model = build_model(states, actions)
dqn = build_agent(model, actions)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

NameError: name 'gym' is not defined

In [None]:
dqn.load_weights('dqn_weights.h5f')

In [None]:
_ = dqn.test(env, nb_episodes=5, visualize=True)