In [1]:
# -*- coding: utf-8 -*-
"""
Created on Fri Jan 12 19:27:34 2018

@author: Jason
"""
import warnings
warnings.simplefilter("ignore")
import tensorflow as tf
import os
os.environ['TF_CPP_MIN_LOG_LEVEL']='3'
import numpy as np
from evaluate import evaluate
from policy_gradient import Controller
import argparse
import datetime
import time
import pickle
import pdb

def dump_best_params(params, task_id):
        f = open('param_task'+str(task_id)+'.pkl', "wb")
        pickle.dump(params, f)
        f.close()

class RCL:
    def __init__(self,args):
        self.args = args
        self.num_tasks = args.n_tasks
        self.epochs = args.n_epochs
        self.batch_size = args.batch_size
        self.lr = args.lr
        self.data_path = args.data_path
        self.max_trials = args.max_trials
        self.penalty = args.penalty
        self.task_list = self.create_mnist_task()
        self.evaluates = evaluate(task_list=self.task_list, args = args)
        self.train()

    def create_session(self):
        config = tf.ConfigProto(allow_soft_placement=True)
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        return sess
    
    def create_mnist_task(self):
        data = pickle.load(open(self.data_path, "rb"))
        return data

    def train(self):
        self.best_params={}
        self.result_process = []
        for task_id in range(0,self.num_tasks):
            self.best_params[task_id] = [0,0]
            if task_id == 0:
                with tf.Graph().as_default() as g:
                    with tf.name_scope("before"):
                        inputs = tf.placeholder(shape=(None, 98), dtype=tf.float32)
                        y = tf.placeholder(shape=(None, 17), dtype=tf.float32)
                        w1 = tf.Variable(tf.truncated_normal(shape=(98,60), stddev=0.01))
                        b1 = tf.Variable(tf.constant(0.1, shape=(60,)))
                        w2 = tf.Variable(tf.truncated_normal(shape=(60,30), stddev=0.01))
                        b2 = tf.Variable(tf.constant(0.1, shape=(30,)))
                        w3 = tf.Variable(tf.truncated_normal(shape=(30,17), stddev=0.01))
                        b3 = tf.Variable(tf.constant(0.1, shape=(17,)))
                        output1 = tf.nn.relu(tf.nn.xw_plus_b(inputs,w1,b1,name="output1"))
                        output2 = tf.nn.relu(tf.nn.xw_plus_b(output1,w2,b2,name="output2"))
                        output3 = tf.nn.xw_plus_b(output2,w3,b3,name="output3")
                        loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=output3)) + \
                               0.0001*(tf.nn.l2_loss(w1) + tf.nn.l2_loss(w2) + tf.nn.l2_loss(w3))
                        if self.args.optimizer=="adam":
                            optimizer = tf.train.AdamOptimizer(learning_rate=self.args.lr)
                        elif self.args.optimizer=="rmsprop":
                            optimizer = tf.train.RMSPropOptimizer(learning_rate=self.lr)
                        elif self.args.optimizer=="sgd":
                            optimizer = tf.train.GradientDescentOptimizer(learning_rate=self.lr)
                        else:
                            raise Exception("please choose one optimizer")
                        train_step = optimizer.minimize(loss)
                        accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(y,axis=1),tf.argmax(output3,axis=1)),tf.float32))
                        sess = self.create_session()
                        sess.run(tf.global_variables_initializer())
                        l = len(self.task_list[0][1])
                        for epoch in range(self.epochs):
                            flag = 0
                            for _ in range(l//self.batch_size+1):
                                batch_xs, batch_ys = (self.task_list[task_id][0][flag:flag+self.batch_size],self.task_list[task_id][1][flag:flag+self.batch_size])
                                flag += self.batch_size
                                sess.run(train_step,feed_dict={inputs:batch_xs, y:batch_ys})
                        # 4, 5 are the indicies of testing dataset of task tuple. 1,2, are the indices of training and 3,4 are the indices of validation. 
                        accuracy_test = sess.run(accuracy, feed_dict={inputs:self.task_list[task_id][4], y:self.task_list[task_id][5]})
                        print("Task {}. Test accuracy: {} ".format(task_id,accuracy_test))
                        self.vars = sess.run([w1,b1,w2,b2,w3,b3])
                    self.best_params[task_id] = [accuracy_test,self.vars]
            else:
                tf.reset_default_graph()
                controller = Controller(self.args)
                results = []
                best_reward = 0
                for trial in range(self.max_trials):
                    actions = controller.get_actions()
                    print("***actions***: ",actions)
                    accuracy_val, accuracy_test = self.evaluates.evaluate_action(var_list = self.vars, 
                             actions=actions, task_id = task_id)

                    results.append(accuracy_val)
                    print("Task {}, Trial {}, Test accuracy: {} ".format(task_id, trial, accuracy_test))
                    reward = accuracy_val - self.penalty*sum(actions)
                    print("reward: ", reward)
                    if reward > best_reward:
                        best_reward = reward
                        self.best_params[task_id] = (accuracy_test, self.evaluates.var_list)
                    controller.train_controller(reward)
                print('Best architecture {}. Reward {}. Test accuracy {}'.format([arch.shape for arch in self.best_params[task_id][1]], reward, self.best_params[task_id][0]))
                print('-' * 150)
                controller.close_session()
                self.result_process.append(results)
                self.vars = self.best_params[task_id][1]
            dump_best_params(self.vars, task_id)
        
if __name__ == "__main__":
    parser = argparse.ArgumentParser(description='Reinforced Continual learning')

    # model parameters
    parser.add_argument('--n_tasks', type=int, default=5,
                        help='number of tasks')
    parser.add_argument('--n_hiddens', type=str, default='60,30',
                        help='number of hidden neurons at each layer')
    parser.add_argument('--n_layers', type=int, default=2,
                        help='number of hidden layers')

    # optimizer parameters
    parser.add_argument('--n_epochs', type=int, default=10,
                        help='Number of epochs per task')
    parser.add_argument('--batch_size', type=int, default=64,
                        help='batch size')
    parser.add_argument('--lr', type=float, default=1e-3,
                        help='SGD learning rate')
    parser.add_argument('--max_trials', type=int, default=1,
                        help='max_trials')

    # experiment parameters
    parser.add_argument('--seed', type=int, default=0,
                        help='random seed')

    parser.add_argument('--save_path', type=str, default='./results/',
                        help='save models at the end of training')

    # data parameters
    parser.add_argument('--data_path', default='./data/awid_tasks.pkl',
                        help='path where data is located')
    parser.add_argument('--state_space', type=int, default=150, help="the state space for search") 
    parser.add_argument('--actions_num', type=int, default=2, help="how many actions to dscide")
    parser.add_argument('--hidden_size', type=int, default=100, help="the hidden size of RNN")
    parser.add_argument('--num_layers', type=int, default=2, help="the layer of a RNN cell")
    parser.add_argument('--cuda', type=bool, default=True, help="use GPU or not")
    parser.add_argument('--bendmark', type=str, default='critic', help="the type of bendmark")
    parser.add_argument('--penalty', type=float, default=0.0001, help="the type of bendmark")#0.0001
    parser.add_argument('--optimizer', type=str, default="adam", help="the type of optimizer")#
    parser.add_argument('--method', type=str, default='policy', help="method for generate actions")

    args = parser.parse_args("")
    start = time.time()
    jason = RCL(args)  
    end = time.time()
    params = jason.best_params
    if not os.path.exists(args.save_path):
        os.makedirs(args.save_path)
    fname = "RCL_FC_" + args.data_path.split('/')[-1] + "_" + datetime.datetime.now().strftime("%Y-%m-%d_%H:%M:%S")
    fname += '_' + str(args.lr) + str("_") + str(args.n_epochs) + '_' + str(args.max_trials) + '_' + str(args.batch_size) + \
             '_' + args.bendmark + '_' + str(args.penalty) + '_' + args.optimizer + '_' + str(args.state_space) + '_' + \
             str(end-start) + '_' + args.method
    fname = os.path.join(args.save_path, fname)
    f = open(fname + '.txt', 'w')
    accuracy = []
    for index,value in params.items():
        print([_.shape for _ in value[1]], file=f)
        accuracy.append(value[0])
    print(accuracy,file=f)
    f.close()
    print(fname)
    name = fname + '.pkl'
    f = open(name, 'wb')
    pickle.dump(jason.result_process, f)
    f.close()


Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See `tf.nn.softmax_cross_entropy_with_logits_v2`.

Task 0. Test accuracy: 0.9892033934593201 



Instructions for updating:
This class is equivalent as tf.keras.layers.LSTMCell, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
This class is equivalent as tf.keras.layers.StackedRNNCells, and will be replaced by that in Tensorflow 2.0.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor





The TensorFlow contrib module will not be included in TensorFlow 2.0.
For more information, please see:
  * https://github.com/tensorflow/community/blob/master/rfcs/20180907-contrib-sunset.md
  * https://github.com/tensorflow/addons
  * https://github.com/tensorflow/io (for I/O related ops)
If you depend on functionality not listed there, please file an issue.

Instructions for updating:
Please switch to tf.train.get_global_step

***actions***:  [131, 136]
task:1, epoch:0, test accuracy:0.93955094
task:1, epoch:4, test accuracy:0.99913645
task:1, epoch:8, test accuracy:0.99913645
task:1, epoch:9, test accuracy:0.99913645
Task 1, Trial 0, Test accuracy: 0.9991364479064941 
reward:  0.9724364479064942
Best architecture [(98, 191), (191,), (191, 166), (166,), (166, 17), (17,)]. Reward 0.9724364479064942. Test accuracy 0.9991364479064941
------------------------------------------------------------------------------------------------------------------------------------------------------


***actions***:  [146, 12]
task:2, epoch:0, test accuracy:0.83607227
task:2, epoch:4, test accuracy:0.9130789
task:2, epoch:8, test accuracy:0.9739237
task:2, epoch:9, test accuracy:0.97473854
Task 2, Trial 0, Test accuracy: 0.9747385382652283 
reward:  0.9589385382652282
Best architecture [(98, 337), (337,), (337, 178), (178,), (178, 17), (17,)]. Reward 0.9589385382652282. Test accuracy 0.9747385382652283
------------------------------------------------------------------------------------------------------------------------------------------------------


***actions***:  [33, 103]
task:3, epoch:0, test accuracy:0.9738815
task:3, epoch:4, test accuracy:0.9744861
task:3, epoch:8, test accuracy:0.97484887
task:3, epoch:9, test accuracy:0.97484887
Task 3, Trial 0, Test accuracy: 0.9748488664627075 
reward:  0.9612488664627075
Best architecture [(98, 370), (370,), (370, 281), (281,), (281, 17), (17,)]. Reward 0.9612488664627075. Test accuracy 0.9748488664627075
------------------------------------------------------------------------------------------------------------------------------------------------------


***actions***:  [12, 1]
task:4, epoch:0, test accuracy:0.22587842
task:4, epoch:4, test accuracy:0.99442273
task:4, epoch:8, test accuracy:1.0
task:4, epoch:9, test accuracy:1.0
Task 4, Trial 0, Test accuracy: 1.0 
reward:  0.9987
Best architecture [(98, 382), (382,), (382, 282), (282,), (282, 17), (17,)]. Reward 0.9987. Test accuracy 1.0
------------------------------------------------------------------------------------------------------------------------------------------------------
./results/RCL_FC_awid_tasks.pkl_2020-11-03_11:00:40_0.001_10_1_64_critic_0.0001_adam_150_22.366694927215576_policy
