In [1]:
from PyCkt import PyCkt
import math
import numpy as np

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
def bin2dec(bin_nb):
    """converts binary no string to uint"""
    
    return int(bin_nb, 2)
    

In [4]:
def sigmoid(x):
    """returns sigmoid of a vector x"""
    
    return list(map(lambda z: 1.0/(1.0 + np.exp(-z)), x))

In [5]:
def relu(vector):
    """returns relu of a vector"""
    
    vector[vector < 0] = 0
    return vector

In [6]:
def choose_action(probability):
    """returns an action given the probability of 1"""
    
    random_value = np.random.uniform()
    if random_value < probability:
        return 1
    else:
        return 0

In [7]:
def discount_rewards(rewards, gamma):
    """ In a sequential system the actions you take 20 steps before the end result are more important to the 
    overall result than an action you took one step ago. Note that gamma gets multiplied the most with the 
    latest action and least with the first action"""
    
    discounted_rewards = np.zeros_like(rewards)
    
    # running_add is the accumulators of the discounted rewards
    running_add = 0
    for t in reversed(range(0, rewards.size)):
        running_add = running_add * gamma + rewards[t]
        discounted_rewards[t] = running_add
    return discounted_rewards

In [8]:
def discount_with_rewards(gradient_log_p, episode_rewards, gamma):
    """ discount the gradient with the normalized rewards """
    
    discounted_episode_rewards = discount_rewards(episode_rewards, gamma)
    # standardize the rewards to be unit normal (helps control the gradient estimator variance)
    discounted_episode_rewards -= np.mean(discounted_episode_rewards)
    discounted_episode_rewards /= np.std(discounted_episode_rewards)
    return gradient_log_p * discounted_episode_rewards

In [9]:
def preprocess_observation(observations):
    """converts observation from string to list"""
    
    ar_ = []
    for observation in observations:
        for c_ in observation:
            if c_ == '1':
                ar_.append(1)
            else:
                ar_.append(0)
    return ar_

In [10]:
def encode_vec(vlist):
    """converts vector from list to string"""
    
    s = ''
    for e in vlist:
        if e==1:
            s = s + '1'
        else:
            s = s + '0'
    return s.encode()

In [11]:
class TestGen_Rl:
    """a reinforcement learning based test generator for sequential circuits"""
    
    # circuit configuration
    coeff = []
    intc = 0
    sel_indices = []
    max_obs = 0
    num_p = 1
    partition_size = 1
    obs_values = []
    obs_value_counts = []
    ckt = None
    init_seq = []
    
    
    # nn hyperparameters
    batch_size = 1
    gamma = 0.9
    decay_rate = 0.99
    learning_rate = 1e-4
    num_hidden_layers = 1
    num_hidden_layer_neurons = {}
    weights = {}
    veclen = 0
    input_dimensions = 0
    output_dimensions = 0
    reward_sum = 0
    prev_reward_sum = 0
    observations = []
    max_vecs = 0 
    

    def __init__(self,
                cktname,
                coef,
                intc,
                feature_indices,
                init_sequence,
                nn_configuration,
                vpp = 1,
                partition_size = 2,
                batch_size = 1,
                gamma = 0.99,
                decay_rate = 0.99,
                learning_rate = 1e-4,
                max_vecs = 0):
        """constructor for TestGen_Rl"""
        
        self.coeff = coef
        self.intrc = intc
        self.sel_indices = list(map(lambda x: x - 1, feature_indices))
        self.max_obs = 2**partition_size
        self.ckt = PyCkt(cktname)
        self.vpp = vpp
        self.input_dimensions = self.vpp * (self.ckt.getNumPo() + self.ckt.getNumState())
        self.veclen = self.ckt.getNumPi()
        self.output_dimensions = self.vpp * self.veclen
        self.partition_size = partition_size
        self.num_p = math.ceil((self.ckt.getNumPo() + self.ckt.getNumState()) / self.partition_size)
        self.init_seq = init_sequence
        
        # lists of all values observed at all windows (or partitions)
        self.obs_values = [[] for w in range(self.num_p)]
        
        # lists of counts of all values observed at all windows
        self.obs_value_counts = [[0 for i in range(self.max_obs)] for w in range(self.num_p)]
        
        self.batch_size = batch_size
        self.gamma = gamma
        self.decay_rate = decay_rate
        self.learning_rate = learning_rate
        self.max_vecs = max_vecs
        
        self.num_hidden_layers = len(nn_configuration)
        
        for layer_nb, layer_size in enumerate(nn_configuration):
            self.num_hidden_layer_neurons[layer_nb] = layer_size
        
        for layer_nb in range(self.num_hidden_layers + 1):
            if layer_nb == 0:
                self.weights[layer_nb] = np.random.randn(self.num_hidden_layer_neurons[layer_nb], self.input_dimensions)
            elif layer_nb ==  self.num_hidden_layers:
                self.weights[layer_nb] = np.random.randn(self.output_dimensions, self.num_hidden_layer_neurons[layer_nb-1])
            else:
                self.weights[layer_nb] = np.random.randn(self.num_hidden_layer_neurons[layer_nb],
                                                        self.num_hidden_layer_neurons[layer_nb-1])
        
            
        
        
    def learn(self):
        """executes the reinforcement learning algorithm"""
        
        pred_fc = []
        expectation_g_squared = {}
        g_dict = {}
        episode_number = 0
        episode_hidden_layer_values = {}
        for wt_ind in self.weights.keys():
            expectation_g_squared[wt_ind] = np.zeros_like(self.weights[wt_ind])
            g_dict[wt_ind] = np.zeros_like(self.weights[wt_ind])
            episode_hidden_layer_values[wt_ind] = []

        episode_observations = []
        episode_gradient_log_ps = []
        episode_rewards = []
        num_applied_vecs = 0
        
        # initialize ckt
        self.observations = []
        self.ckt.reset()
        for vec in self.init_seq:
            resp, state = self.ckt.lsim_s(vec.encode())
        self.observations.append((resp + state).decode())
        
        for vec_ind in range(self.vpp - 1):
            #print ('appending %d random vecs to init sequence' % (self.vpp-1))
            vec = [np.random.randint(0,1) for _ in range(self.veclen)]
            resp, state = self.ckt.lsim_s(encode_vec(vec))
            self.observations.append((resp + state).decode())
            num_applied_vecs += 1
        
        print (num_applied_vecs)
        self.prev_reward_sum = 0
        self.reward_sum = self.get_est_fcov(num_applied_vecs)
        
        while True:

            processed_observations = preprocess_observation(self.observations)
            hidden_layer_values, up_probability = self.apply_neural_nets(processed_observations)

            episode_observations.append(processed_observations)
            for layer_nb in range(self.num_hidden_layers):
                episode_hidden_layer_values[layer_nb].append(hidden_layer_values[layer_nb])
            
            # action
            action = list(map(lambda x: choose_action(x), up_probability))
            self.observations = []
            # carry out the chosen action
            for vec_ind in range(self.vpp):
                a = vec_ind * self.veclen
                b = (vec_ind + 1) * self.veclen
                vec = action[a:b]
                resp, state = self.ckt.lsim_s(encode_vec(vec))
                self.observations.append((resp+state).decode())
                num_applied_vecs += 1
            #print num_applied_vecs

            self.prev_reward_sum = self.reward_sum
            self.reward_sum = self.get_est_fcov(num_applied_vecs)
            self.reward = self.reward_sum - self.prev_reward_sum
            episode_rewards.append(self.reward)

            # see here: http://cs231n.github.io/neural-networks-2/#losses
            # we cheat for all steps in the episode, for all episodes :st
            fake_labels = action
            loss_function_gradient = np.subtract(fake_labels, up_probability)
            episode_gradient_log_ps.append(loss_function_gradient)

            if num_applied_vecs >= self.max_vecs: # an episode finished
                episode_number += 1

                # convert the lists to numpy arrays for easier processing
                for layer_nb in range(self.num_hidden_layers):
                    episode_hidden_layer_values[layer_nb] = np.vstack(episode_hidden_layer_values[layer_nb])
                episode_observations = np.vstack(episode_observations)
                episode_gradient_log_ps = np.vstack(episode_gradient_log_ps)
                episode_rewards = np.vstack(episode_rewards)

                # Tweak the gradient of the log_ps based on the discounted rewards
                # we have rewards recorded for each step in the episode
                episode_gradient_log_ps_discounted = discount_with_rewards(episode_gradient_log_ps, episode_rewards, self.gamma)

                gradient = self.compute_gradient(
                  episode_gradient_log_ps_discounted,
                  episode_hidden_layer_values,
                  episode_observations
                )

                # Sum the gradient for use when we hit the batch size
                for layer_name in gradient:
                    g_dict[layer_name] += gradient[layer_name]

                if episode_number % self.batch_size == 0:
                    self.update_weights(expectation_g_squared, g_dict)

                print ('resetting env. episode reward total was %f' % self.reward_sum)
                pred_fc.append(self.reward_sum)

                episode_hidden_layer_values = {}
                for wt_ind in self.weights.keys():
                    expectation_g_squared[wt_ind] = np.zeros_like(self.weights[wt_ind])
                    g_dict[wt_ind] = np.zeros_like(self.weights[wt_ind])
                
                for layer_nb in range(self.num_hidden_layers):
                    episode_hidden_layer_values[layer_nb] = []

                episode_observations = []
                episode_gradient_log_ps = []
                episode_rewards = []
                num_applied_vecs = 0

                self.obs_values = [[] for w in range(self.num_p)]
                self.obs_value_counts = [[0 for i in range(self.max_obs)] for w in range(self.num_p)]
                self.ckt.reset()
                
                self.observations = []
                for vec in self.init_seq:
                    resp, state = self.ckt.lsim_s(vec.encode())
                self.observations.append((resp + state).decode())
                
                for vec_ind in range(self.vpp - 1):
                    vec = [np.random.randint(0,1) for _ in range(self.veclen)]
                    resp, state = self.ckt.lsim_s(encode_vec(vec))
                    self.observations.append((resp+state).decode())
                    num_applied_vecs += 1
            
                self.prev_reward_sum = 0
                self.reward_sum = self.get_est_fcov(num_applied_vecs)
        
    def compute_gradient(self, gradient_log_p, hidden_layer_values, observation_values):
        """ See here: http://neuralnetworksanddeeplearning.com/chap2.html"""
        delta_L = gradient_log_p
        dC_dw = {}
        delta_l = {}
        for layer_nb in reversed(range(self.num_hidden_layers + 1)):
            if layer_nb == self.num_hidden_layers:
                dC_dw[layer_nb] = np.dot(delta_L.T, hidden_layer_values[layer_nb - 1])
                delta_l[layer_nb] = relu(np.dot(delta_L, self.weights[layer_nb]))
            elif layer_nb == 0:
                dC_dw[layer_nb] = np.dot(delta_l[layer_nb + 1].T, observation_values)
            else:
                dC_dw[layer_nb] = np.dot(delta_l[layer_nb + 1].T, hidden_layer_values[layer_nb - 1])
                delta_l[layer_nb] = relu(np.dot(delta_l[layer_nb + 1], self.weights[layer_nb]))
        return dC_dw
    
    def apply_neural_nets(self, observation_matrix):
        """ Based on the observation_matrix and weights, compute the new hidden layer values and the new output layer values"""
        hidden_layer_values = {}
        
        for layer_nb in range(self.num_hidden_layers + 1):
            if layer_nb == 0:
                #print ('weights dim: %s' % str(self.weights[layer_nb].shape))
                #print ('obs dim: %s' % str(len(observation_matrix)))
                hidden_layer_values[layer_nb] = relu(np.dot(self.weights[layer_nb], observation_matrix))
            elif layer_nb == self.num_hidden_layers:
                output_layer_values = sigmoid(np.dot(self.weights[layer_nb], hidden_layer_values[layer_nb-1]))
            else:
                hidden_layer_values[layer_nb] = relu(np.dot(self.weights[layer_nb], hidden_layer_values[layer_nb-1]))
        return hidden_layer_values, output_layer_values
    
    def update_weights(self, expectation_g_squared, g_dict):
        """ See here: http://sebastianruder.com/optimizing-gradient-descent/index.html#rmsprop"""
        epsilon = 1e-5
        for layer_name in self.weights.keys():
            g = g_dict[layer_name]
            expectation_g_squared[layer_name] = self.decay_rate * expectation_g_squared[layer_name] + \
                (1 - self.decay_rate) * g**2
            self.weights[layer_name] += (self.learning_rate * g)/(np.sqrt(expectation_g_squared[layer_name] + epsilon))
            g_dict[layer_name] = np.zeros_like(self.weights[layer_name]) # reset batch gradient buffer
            
    def update_part_counts(self):
        """updates number of observed counts for all partititions in the response"""
    
        for observation in self.observations:
            for i in range(self.num_p):
                #extract window substring
                substr = observation[ i*self.partition_size : min(len(observation),(i+1)*self.partition_size) ]

                #get count index from the observed number in the window (i.e. substring)
                c_ind = bin2dec(substr)

                #increment observation count of the observed number
                self.obs_value_counts[i][c_ind]+=1

                #add to observed numbers if not observed before
                if c_ind not in self.obs_values[i]:
                    self.obs_values[i].append(c_ind)
    
    def get_updated_metrics(self, resp_ind):
        """updates the metrics according to the response and reports them"""
    
        #tracks count of numbers observed so far in each window
        num_obs = [0 for _ in range(self.num_p)] 

        #tracks cumulative entropy observed so far
        cum_ent = [0. for _ in range(self.num_p)]
        
        self.update_part_counts()

        for i in range(self.num_p):
            num_obs[i] = len(self.obs_values[i])
            ent = 0.
            for j in range(self.max_obs):
                pj = self.obs_value_counts[i][j] * 1.0 / (resp_ind + 1)
                if pj != 0:
                     ent = ent + pj * math.log(pj,2)
            cum_ent[i] = ent

        # concatenate all features together
        metrics = num_obs + cum_ent
        metrics = list(map(lambda x: abs(x), metrics))

        #return selected metrics only
        sel_mets = []
        for j in range(len(metrics)):
            if j in self.sel_indices:
                sel_mets.append(metrics[j])

        return sel_mets
    
    def get_est_fcov(self, resp_ind):
        """returns new estimated fault coverage from circuit outputs"""
    
        #get values of selected features
        sel_feat = self.get_updated_metrics(resp_ind)
        return np.dot(sel_feat, self.coeff) + self.intrc

In [12]:
# define globals 
cktname = 's820'
coeff = [0.02586818,  0.01588013, -0.16383635, -0.09241966, 0.08055655, 0.0037864, 0.23622088, 0.12172977]
intrc = 0.05043711182
feature_indices = [11, 12, 15, 18, 19, 20, 23, 24]
nn_conf = [200]
init_seq = ['010100011011011001','101011010110110110']


tg = TestGen_Rl(cktname.encode(),
                coeff,
                intrc,
                feature_indices,
                init_seq,
                nn_conf,
                vpp = 1,
                partition_size = 2,
                batch_size = 1,
                gamma = 0.9,
                decay_rate = 0.99,
                learning_rate = 5e-4,
                max_vecs = 700)

In [13]:
tg.weights[0].shape

(200, 24)

In [14]:
tg.input_dimensions

24

In [15]:
tg.output_dimensions

18

In [16]:
tg.veclen

18

In [17]:
len(tg.observations)

0

In [18]:
tg.learn()

0
resetting env. episode reward total was 0.198915
resetting env. episode reward total was 0.212549
resetting env. episode reward total was 0.263701
resetting env. episode reward total was 0.187696
resetting env. episode reward total was 0.270523
resetting env. episode reward total was 0.276156
resetting env. episode reward total was 0.288939
resetting env. episode reward total was 0.312179
resetting env. episode reward total was 0.325990
resetting env. episode reward total was 0.327430
resetting env. episode reward total was 0.329888
resetting env. episode reward total was 0.382526
resetting env. episode reward total was 0.426519
resetting env. episode reward total was 0.456716
resetting env. episode reward total was 0.511935
resetting env. episode reward total was 0.556194
resetting env. episode reward total was 0.561713
resetting env. episode reward total was 0.569094
resetting env. episode reward total was 0.598067
resetting env. episode reward total was 0.596348
resetting env. epi

KeyboardInterrupt: 

In [None]:
tg.observations

In [None]:
tg.weights[1].shape

In [None]:
tg