In [1]:
import numpy as np
import scipy
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
import pickle as pkl

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import to_categorical
from tensorflow.keras import datasets, layers, models
from sklearn.model_selection import StratifiedShuffleSplit


In [2]:
class Dataset:
    """The base class for all datasets.
    
    Every dataset class should inherit from Dataset 
    and load the data. Dataset only declaires the attributes.
    
    Attributes:
        train_data: A numpy array with data that can be labelled.
        train_labels: A numpy array with labels of train_data.
        test_data: A numpy array with data that will be used for testing.
        test_labels: A numpy array with labels of test_data.
        n_state_estimation: An integer indicating #datapoints reserved for state representation estimation.
        distances: A numpy array with pairwise Eucledian distances between all train_data.
    """
    
    def __init__(self, n_state_estimation):
        """Inits the Dataset object and initialises the attributes with given or empty values."""
        self.train_data = np.array([[]])
        self.train_labels = np.array([[]])
        self.test_data = np.array([[]])
        self.test_labels = np.array([[]])
        self.n_state_estimation = n_state_estimation
        self.regenerate()
        
    def regenerate(self):
        """The function for generating a dataset with new parameters."""
        pass
        
    def _scale_data(self):
        """Scales train data to 0 mean and unit variance. Test data is scaled with parameters of train data."""
        self.train_data = self.train_data/255.0
        self.test_data = self.test_data/255.0
        #scaler = preprocessing.StandardScaler().fit(self.train_data)
        #self.train_data = scaler.transform(self.train_data)
        #self.test_data = scaler.transform(self.test_data)
        
    def _keep_state_data(self):
        """self.n_state_estimation samples in training data are reserved for estimating the state."""
        self.train_data, self.state_data, self.train_labels, self.state_labels = train_test_split(
            self.train_data, self.train_labels, test_size=self.n_state_estimation)
        
    #def _compute_distances(self):
    #    print("computing distance...")
    #    """Computes the pairwise distances between all training datapoints"""
    #    self.distances = scipy.spatial.distance.pdist(self.train_data, metric='cosine')
    #    self.distances = scipy.spatial.distance.squareform(self.distances)

In [3]:
class MNIST_train(Dataset):      
    """
    Added by Seungbo
    
    Attributes:
        possible_names: A list indicating the dataset names that can be used.
        subset: An integer indicating what subset of data to use. 0: even, 1: odd, -1: all datapoints. 
        size: An integer indicating the size of training dataset to sample, if -1 use all data.
    """
    
    def __init__(self, n_state_estimation, subset, size=-1):
        """Inits a few attributes and the attributes of Dataset object."""
        self.subset = subset
        self.size = size
        Dataset.__init__(self, n_state_estimation) 
    
    def regenerate(self):
        """Loads the data and split it into train and test."""
        # load data
        mnist = tf.keras.datasets.mnist
        (X, y), _ = mnist.load_data()
        X = X.reshape((len(X), 28, 28, 1))
        #X = X.reshape(len(X), -1)
        dtst_size = len(y)
        
        # even datapoints subset
        if self.subset == 0:
            valid_indeces = list(range(0, dtst_size, 2))
        # odd datapoints subset
        elif self.subset == 1:
            valid_indeces = list(range(1, dtst_size, 2))
        # all datapoints
        elif self.subset == -1:
            valid_indeces = list(range(dtst_size))
        else:
            print('Incorrect subset attribute value!')
        
        # try to split data into training and test subsets while insuring that 
        # all classes from test data are present in train data 
        
        # get a part of dataset according to subset (even, odd or all)
        train_test_data = X[valid_indeces,:]
        train_test_labels = y[valid_indeces]
        # use a random half/half split for train and test data
        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
        
        for train_index, test_index in sss.split(train_test_data, train_test_labels):
            self.train_data,self.test_data = X[train_index], X[test_index]
            self.train_labels, self.test_labels = y[train_index], y[test_index]

        self._scale_data()
        self._keep_state_data()
        #self._compute_distances()

        # keep only a part of data for training
        self.train_data = self.train_data.astype(np.float32)
        self.train_data = self.train_data[:self.size,:]
        self.train_labels = self.train_labels[:self.size]
        self.train_labels = to_categorical(self.train_labels, 10)
        # this is needed to insure that some of the classes are missing in train or test data


In [4]:
class MNIST_test(Dataset):      
    """
    Added by Seungbo
    
    Attributes:
        possible_names: A list indicating the dataset names that can be used.
        subset: An integer indicating what subset of data to use. 0: even, 1: odd, -1: all datapoints. 
        size: An integer indicating the size of training dataset to sample, if -1 use all data.
    """
    
    def __init__(self, n_state_estimation, subset, size=-1):
        """Inits a few attributes and the attributes of Dataset object."""
        self.subset = subset
        self.size = size
        Dataset.__init__(self, n_state_estimation) 
    
    def regenerate(self):
        """Loads the data and split it into train and test."""
        # load data
        mnist = tf.keras.datasets.mnist
        _, (X, y) = mnist.load_data()
        X = X.reshape((len(X), 28, 28, 1))
        #X = X.reshape(len(X), -1)
        dtst_size = len(y)
        
        # even datapoints subset
        if self.subset == 0:
            valid_indeces = list(range(0, dtst_size, 2))
        # odd datapoints subset
        elif self.subset == 1:
            valid_indeces = list(range(1, dtst_size, 2))
        # all datapoints
        elif self.subset == -1:
            valid_indeces = list(range(dtst_size))
        else:
            print('Incorrect subset attribute value!')
        
        # try to split data into training and test subsets while insuring that 
        # all classes from test data are present in train data 
        
        # get a part of dataset according to subset (even, odd or all)
        train_test_data = X[valid_indeces,:]
        train_test_labels = y[valid_indeces]
        # use a random half/half split for train and test data
        sss = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=0)
        
        for train_index, test_index in sss.split(train_test_data, train_test_labels):
            self.train_data,self.test_data = X[train_index], X[test_index]
            self.train_labels, self.test_labels = y[train_index], y[test_index]

        self._scale_data()
        self._keep_state_data()
        #self._compute_distances()

        # keep only a part of data for training
        self.train_data = self.train_data.astype(np.float32)
        self.train_data = self.train_data[:self.size,:]
        self.train_labels = self.train_labels[:self.size]
        self.train_labels = to_categorical(self.train_labels, 10)        
        # this is needed to insure that some of the classes are missing in train or test data


In [5]:
#%matplotlib inline

import numpy as np
from sklearn import metrics
from sklearn.linear_model import LogisticRegression
# depending on the classification model use, we might need to import other packages
# from sklearn import svm
# from sklearn.ensemble import RandomForestClassifier

#from datasets import DatasetUCI

from envs import LalEnvTargetAccuracy

from helpers import Minibatch, ReplayBuffer
from dqn import DQN
from Test_AL import policy_rl

SyntaxError: invalid syntax (envs.py, line 209)

# Setup and initialisation



#### Parameters for dataset and model

In [8]:
N_STATE_ESTIMATION = 30
SIZE = 100
# if we want to train and test RL on the same dataset, use even and odd datapoints for training and testing correspondingly
SUBSET = -1 # -1 for using all datapoints, 0 for even, 1 for odd
N_JOBS = 1 # can set more if we want to parallelise

# The quality is measures according to a given quality measure `quality_method`. 
QUALITY_METHOD = metrics.accuracy_score
# The `tolerance_level` is the proportion of max quality that needs to be achived in order to terminate an episode. 
TOLERANCE_LEVEL = 0.98

Initialise a dataset that will contain a sample of datapoint from one the indicated classes.

In [9]:
dataset = MNIST_train(n_state_estimation=N_STATE_ESTIMATION, subset=SUBSET, size=SIZE)
dataset_test = MNIST_test(n_state_estimation=N_STATE_ESTIMATION, subset=1, size=SIZE)

Initialise a model that would be used for training a classifier. <br>
It can be, for example, Logistic regression: <br>
`LogisticRegression(n_jobs=N_JOBS)` <br>
SVM: <br>
`svm.SVC(probability=True)`

In [6]:
class MNISTModel (tf.keras.Model): # keras.model 구현
    def __init__(self):  # 기본이 되는 층을 구현
        # call the parent constructor(class의 tf.keras.Model) 
        super(MNISTModel, self).__init__() 
        # initialize the layers
        self.conv1 = keras.layers.Conv2D(filters=32, kernel_size=[3, 3], padding='SAME', activation=tf.nn.relu)
        self.pool1 = keras.layers.MaxPool2D(padding='SAME')
        self.conv2 = keras.layers.Conv2D(filters=64, kernel_size=[3, 3], padding='SAME', activation=tf.nn.relu)
        self.pool2 = keras.layers.MaxPool2D(padding='SAME')
        self.conv3 = keras.layers.Conv2D(filters=128, kernel_size=[3, 3], padding='SAME', activation=tf.nn.relu)
        self.pool3 = keras.layers.MaxPool2D(padding='SAME')
        self.pool3_flat = keras.layers.Flatten()
        self.dense4 = keras.layers.Dense(units=256, activation=tf.nn.relu)
        self.drop4 = keras.layers.Dropout(rate=0.4)
        self.dense5 = keras.layers.Dense(units=10, activation=tf.nn.softmax)
    # init에서 만든 층을 불러와서 network 구성 (연산부분을 담당)   
    def call(self, inputs, training=False):  # training : training과 test시에 다르게 동작할 때, true면 둘이 동일하게 사용됨
        net = self.conv1(inputs)
        net = self.pool1(net)
        net = self.conv2(net)
        net = self.pool2(net)
        net = self.conv3(net)
        net = self.pool3(net)
        net = self.pool3_flat(net)
        net = self.dense4(net)
        net = self.drop4(net)
        net = self.dense5(net)
        return net

In [15]:
model = keras.Sequential([
    keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    keras.layers.MaxPooling2D((2, 2)),
    keras.layers.Conv2D(64, (3, 3), activation='relu'),
    keras.layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dense(10, activation='softmax')
])

model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

In [10]:
dataset.train_labels[0, :]

array([0., 0., 0., 0., 1., 0., 0., 0., 0., 0.], dtype=float32)

In [16]:
logits = model(dataset.train_data, dataset.train_labels)

In [26]:
output = model.predict()

TensorShape([Dimension(100), Dimension(10)])

In [None]:
logits = model(dataset.train_data, dataset.train_labels,)
prediction = tf.nn.softmax(logits)

# Define loss and optimizer
loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(
    logits=logits, labels=Y))
optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
train_op = optimizer.minimize(loss_op)

In [None]:
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.01),
    loss=tf.keras.losses.SparseCategoricalCrossentropy(),
    metrics=[tf.keras.metrics.SparseCategoricalAccuracy()],
)

In [None]:
model.fit(dataset.train_data, dataset.train_labels,
         batch_size=32,
         epochs=5,
         verbose=1)

Initialise the environment

In [None]:
env = LalEnvTargetAccuracy(dataset, model, quality_method=QUALITY_METHOD, tolerance_level=TOLERANCE_LEVEL)
env_test = LalEnvTargetAccuracy(dataset_test, model, quality_method=QUALITY_METHOD, tolerance_level=TOLERANCE_LEVEL)
#tf.reset_default_graph()

#### Parameters for training RL

In [None]:
DIRNAME = './agents/cnn_mnist_jupyter/' # The resulting agent of this experiment will be written in a file

# Replay buffer parameters.
REPLAY_BUFFER_SIZE = 1e4
PRIOROTIZED_REPLAY_EXPONENT = 3

# Agent parameters.
BATCH_SIZE = 32
LEARNING_RATE = 1e-3
TARGET_COPY_FACTOR = 0.01
BIAS_INITIALIZATION = 0 # default 0 # will be set to minus half of average duration during warm start experiemnts

# Warm start parameters.
WARM_START_EPISODES = 128 # reduce for test
NN_UPDATES_PER_WARM_START = 100

# Episode simulation parameters.
EPSILON_START = 1
EPSILON_END = 0.1
EPSILON_STEPS = 1000

# Training parameters
TRAINING_ITERATIONS = 1000 # reduce for test
TRAINING_EPISODES_PER_ITERATION = 10 # at each training ietration x episodes are simulated
NN_UPDATES_PER_ITERATION = 60 # at each training iteration x gradient steps are made

# Validation and test parameters
N_VALIDATION = 500 # reduce for test
N_TEST = 500 # reduce for test
VALIDATION_TEST_FREQUENCY = 100 # every x iterations val and test are performed

Initialise replay buffer

In [None]:
replay_buffer = ReplayBuffer(buffer_size=REPLAY_BUFFER_SIZE, 
                             prior_exp=PRIOROTIZED_REPLAY_EXPONENT)

# Warm start

Warm-start the replay buffer with random episodes. 

In [None]:
def reset_weights(model):
    """Initialize weights of Neural Networks
    """
    session = keras.backend.get_session()
    for layer in model.layers: 
        if hasattr(layer, 'kernel_initializer'):
            layer.kernel.initializer.run(session=session)

Collect episodes

In [None]:
# Keep track of episode duration to compute average
episode_durations = []
for _ in range(WARM_START_EPISODES):
    print('.', end='')
    #reset_weights(model)
    
    # Reset the environment to start a new episode
    # classifier_state contains vector representation of state of the environment (depends on classifier)
    # next_action_state contains vector representations of all actions available to be taken at the next step
    classifier_state, next_action_state = env.reset(n_start=10)
    terminal = False
    episode_duration = 0
    # before we reach a terminal state, make steps
    while not terminal:
        # Choose a random action
        action = np.random.randint(0, env.n_actions)
        # taken_action_state is a vector corresponding to a taken action
        taken_action_state = next_action_state[:,action]
        next_classifier_state, next_action_state, reward, terminal = env.step(action)
        # Store the transition in the replay buffer
        replay_buffer.store_transition(classifier_state, 
                                       taken_action_state, 
                                       reward, next_classifier_state, 
                                       next_action_state, terminal)
        # Get ready for next step
        classifier_state = next_classifier_state
        episode_duration += 1 
    episode_durations.append(episode_duration)
# compute the average episode duration of episodes generated during the warm start procedure
av_episode_duration = np.mean(episode_durations)
print('Average episode duration = ', av_episode_duration)

BIAS_INITIALIZATION = -av_episode_duration/2

Initialize the DQN agent

In [None]:
agent = DQN(experiment_dir=DIRNAME,
            observation_length=N_STATE_ESTIMATION,
            learning_rate=LEARNING_RATE,
            batch_size=BATCH_SIZE,
            target_copy_factor=TARGET_COPY_FACTOR,
            bias_average=BIAS_INITIALIZATION)

Do updates of the network based on warm start episodes

In [None]:
for _ in range(NN_UPDATES_PER_WARM_START):
    print('.', end='')
    # Sample a batch from the replay buffer proportionally to the probability of sampling.
    minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
    # Use batch to train an agent. Keep track of temporal difference errors during training.
    td_error = agent.train(minibatch)
    # Update probabilities of sampling each datapoint proportionally to the error.
    replay_buffer.update_td_errors(td_error, minibatch.indeces)

# Train RL

Run multiple training iterations. Each iteration consits of:
- generating episodes following agent's actions with exploration
- validation and test episodes for evaluating performance
- Q-network updates


In [None]:
train_episode_rewards = []
i_episode = 0

In [None]:
for iteration in range(TRAINING_ITERATIONS):
    # GENERATE NEW EPISODES
    # Compute epsilon value according to the schedule.
    epsilon = max(EPSILON_END, EPSILON_START-iteration*(EPSILON_START-EPSILON_END)/EPSILON_STEPS)
    print(iteration, end=': ')
    # Simulate training episodes.
    for _ in range(TRAINING_EPISODES_PER_ITERATION):
        # Reset the environment to start a new episode.
        classifier_state, next_action_state = env.reset()
        print(".", end='')
        terminal = False
        # Keep track of stats of episode to analyse it in tensorboard.
        episode_reward = 0
        episode_duration = 0
        episode_summary = tf.Summary()
        # Run an episode.
        while not terminal:
            # Let an agent choose an action.
            action = agent.get_action(classifier_state, next_action_state)
            # Get a prob of a datapoint corresponding to an action chosen by an agent.
            # It is needed just for the tensorboard analysis.
            rlchosen_action_state = next_action_state[0,action]
            
            # With epsilon probability, take a random action.
            if np.random.ranf() < epsilon: 
                action = np.random.randint(0, env.n_actions)
            # taken_action_state is a vector that corresponds to a taken action
            taken_action_state = next_action_state[:,action]
            # Make another step.
            next_classifier_state, next_action_state, reward, terminal = env.step(action)
            # Store a step in replay buffer
            replay_buffer.store_transition(classifier_state, 
                                           taken_action_state, 
                                           reward, 
                                           next_classifier_state, 
                                           next_action_state, 
                                           terminal)
            # Change a state of environment.
            classifier_state = next_classifier_state
            # Keep track of stats and add summaries to tensorboard.
            episode_reward += reward
            episode_duration += 1
            episode_summary.value.add(simple_value=rlchosen_action_state, 
                                      tag="episode/rlchosen_action_state")
            episode_summary.value.add(simple_value=taken_action_state[0], 
                                      tag="episode/taken_action_state")
        # Add summaries to tensorboard
        episode_summary.value.add(simple_value=episode_reward, 
                                  tag="episode/episode_reward")
        episode_summary.value.add(simple_value=episode_duration, 
                                  tag="episode/episode_duration")
        i_episode += 1
        agent.summary_writer.add_summary(episode_summary, i_episode)
        agent.summary_writer.flush()
        
    # VALIDATION AND TEST EPISODES
    episode_summary = tf.Summary()
    if iteration%VALIDATION_TEST_FREQUENCY == 0:
        # Validation episodes are run. Use env for it.
        all_durations = []
        for i in range(N_VALIDATION):
            done = False
            state, next_action_state = env.reset()
            while not(done):
                action = policy_rl(agent, state, next_action_state)        
                taken_action_state = next_action_state[:,action]
                next_state, next_action_state, reward, done = env.step(action)
                state = next_state
            all_durations.append(len(env.episode_qualities))
        episode_summary.value.add(simple_value=np.mean(all_durations), 
                                  tag="episode/train_duration")
        # Test episodes are run. Use env_test for it.
        all_durations = []
        for i in range(N_TEST):
            done = False
            state, next_action_state = env_test.reset()
            while not(done):
                action = policy_rl(agent, state, next_action_state)        
                taken_action_state = next_action_state[:,action]
                next_state, next_action_state, reward, done = env_test.step(action)
                state = next_state
            all_durations.append(len(env_test.episode_qualities))
        episode_summary.value.add(simple_value=np.mean(all_durations), 
                                  tag="episode/test_duration")
    
    episode_summary.value.add(simple_value=epsilon, 
                              tag="episode/epsilon")
    agent.summary_writer.add_summary(episode_summary, iteration)
    agent.summary_writer.flush()
            
    # NEURAL NETWORK UPDATES
    for _ in range(NN_UPDATES_PER_ITERATION):
        minibatch = replay_buffer.sample_minibatch(BATCH_SIZE)
        td_error = agent.train(minibatch)
        replay_buffer.update_td_errors(td_error, minibatch.indeces)

#### To see the results in tensorboard

on the server:
tensorboard --logdir=./

on the computer:
ssh -N -f -L localhost:6006:localhost:6006 konyushk@iccvlabsrv20.iccluster.epfl.ch && open http://localhost:6006