In [1]:
%tensorflow_version 1.x

TensorFlow 1.x selected.


In [2]:
!pip install numpy==1.16.1



In [0]:
import tensorflow as tf
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.ERROR)

In [4]:
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0

# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)

# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)

# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)

# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
# tf.random.set_seed(seed_value)
# for later versions: 
tf.compat.v1.set_random_seed(seed_value)

# 5. Configure a new global `tensorflow` session
from keras import backend as K
session_conf = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
sess = tf.Session(graph=tf.get_default_graph(), config=session_conf)
K.set_session(sess)
# for later versions:
# session_conf = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1)
# sess = tf.compat.v1.Session(graph=tf.compat.v1.get_default_graph(), config=session_conf)
# tf.compat.v1.keras.backend.set_session(sess)

Using TensorFlow backend.


In [0]:
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.utils import shuffle
from sklearn.metrics import confusion_matrix,f1_score,accuracy_score

from keras.models import Sequential
from keras.layers import Dense, Activation
from keras.optimizers import SGD, Adadelta, Adam
from keras.initializers import RandomUniform

from time import sleep,time
import numpy as np
import pandas as pd
import random
import gym
import sys
from tqdm import tqdm
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings('ignore')


In [0]:
class Environment:
    def __init__(self , dataset_name = "ant-1.3.csv" , minority_class = [1,2,3] , lambd = 0.9 , test_size = 0.2):
        self.X,self.y ,self.X_test,self.y_test = self.load_data(dataset_name , test_size)
        self.observation_space = self.X.shape[1]
        self.action_space = 4
        self.terminal = False
        self.minority_class = minority_class
        self.lambd = lambd
        self.number_of_examples = self.X.shape[0]

    def load_data(self , dataset_name , test_size):
        data = pd.read_csv(dataset_name)
        X = MinMaxScaler().fit_transform(data.values[:,3:-1])
        y = data.values[:,-1]
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=1)
        return X_train,y_train,X_test,y_test

    def reset(self):
        self.X, self.y = shuffle(self.X, self.y)
        self.terminal = False

    def step(self , action ,label):
        reward = 0
        if label in self.minority_class:
            if action == label:
                reward = 1
            else:
                reward = -1
                self.terminal = True
        else:
            if action == label:
                reward = self.lambd
            else:
                reward = -self.lambd
        return reward , self.terminal

In [7]:
# Environment
env = Environment()
inputCount = env.observation_space
actionsCount = env.action_space

# Neural Network
model = Sequential()
model.add(Dense(24, input_dim=inputCount, activation='relu'))
model.add(Dense(24, activation='relu'))
model.add(Dense(actionsCount, activation='linear'))

model.compile(loss='mse', optimizer=Adam(), metrics=['mae'])
model.summary()

#Initialize Memory And Epsilon
memory = ([],[],[],[],[])
epsilon = 1.0

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_1 (Dense)              (None, 24)                504       
_________________________________________________________________
dense_2 (Dense)              (None, 24)                600       
_________________________________________________________________
dense_3 (Dense)              (None, 4)                 100       
Total params: 1,204
Trainable params: 1,204
Non-trainable params: 0
_________________________________________________________________


In [0]:
def score(env , model):
    X_train = env.X
    y_train = env.y
    X_test = env.X_test
    y_test = env.y_test
    train_pred = model.predict(X_train.reshape((X_train.shape[0],X_train.shape[1])))
    test_pred = model.predict(X_test.reshape((X_test.shape[0],X_train.shape[1])))
    train_pred = np.argmax(train_pred , axis = 1)
    test_pred = np.argmax(test_pred , axis = 1)
    print("On Training Data " , end = "\t")
    print("F1 Score =" , f1_score(train_pred.astype(int) , y_train.astype(int) , labels = [0,1,2,3],average = "micro") , end = "\t")
    print("Accuracy =" , accuracy_score(train_pred.astype(int) , y_train.astype(int)))
    print("On Testing Data " , end = "\t")
    print("F1 Score =" , f1_score(test_pred.astype(int) , y_test.astype(int) , labels = [0,1,2,3], average = "micro") , end = "\t")
    print("Accuracy =" , accuracy_score(test_pred.astype(int) , y_test.astype(int)))

In [0]:
def replay_memory(model , memory , batch_size = 64 , gamma = 1.0 , actionsCount = 2 , epochs = 1 , verbose = 0):
    rand_nums = np.random.randint(0, len(memory), size=batch_size)
    states = memory[0][rand_nums]
    action = memory[1][rand_nums]
    rewards = memory[2][rand_nums]
    next_states = memory[3][rand_nums]
    done = memory[4][rand_nums].astype(int)

    target = rewards + (gamma * np.multiply((done+1)%2 , np.max(model.predict(next_states) , axis = 1)))
    target_f = []
    if gamma == 0:
        target_f = np.zeros((batch_size , actionsCount))
    if gamma != 0:
        target_f = model.predict(states)
    for i in range(actionsCount):
        ind = np.where(action == i)
        target_f[ind,i] = target[ind]
    targets = target_f

    model.fit(states, targets, epochs=epochs, verbose=verbose)
    return model


In [0]:
def dqn_train(env , model , episodes = 5000 , gamma = 1.0 ,epsilon = 1.0 , epsilonMin = 0.01 , epsilonDecay = 0.999 , memory = ([],[],[],[],[]) ,batch_size = 64 , memoryMax = 500000 , resume_training = False , model_filename = None , save_model_filename = "weights.h5"):
    if resume_training:
        model.load_weights(model_filename)
    inputCount = env.observation_space
    actionsCount = env.action_space
    
    # Training
    all_scores = []
    l_states = memory[0]
    l_actions = memory[1]
    l_rewards = memory[2]
    l_next_states = memory[3]
    l_done = memory[4]

    for e in tqdm(range(episodes)):
        env.reset()
        s = np.expand_dims(env.X[0],axis = 0)
        l = env.y[0]
        done = False
        for ind in range(1 , env.number_of_examples - 1):
            # Act greedy sometimes
            if np.random.rand() <= epsilon:
                a = random.randrange(actionsCount)
            else:
                a = np.argmax(model.predict(s))

            r, done = env.step(a , l)
            
            newS = np.expand_dims(env.X[ind] , axis = 0)

            if len(l_states) == 0:
                l_states = s
                l_actions = [a]
                l_rewards = [r]
                l_next_states = newS
                l_done = [done]
            else:
                l_states = np.append(l_states , s , axis = 0)
                l_actions = np.append(l_actions , [a] , axis = 0)
                l_rewards = np.append(l_rewards , [r] , axis = 0)
                l_next_states = np.append(l_next_states , newS , axis = 0)
                l_done = np.append(l_done , [done] , axis = 0)

            # free first items in memory
            if len(l_states)>=memoryMax:
                l_states = l_states[5000:]
                l_actions = l_actions[5000:]
                l_rewards = l_rewards[5000:]
                l_next_states = l_next_states[5000:]
                l_done = l_done[5000:]

            if done:
                # print("\repisode: {}/{}, score: {}".format(e, episodes, ind) , end = "")
                # sys.stdout.flush()
                all_scores.append(ind)
                break

            # State
            s = np.expand_dims(env.X[ind] , axis = 0)
            l = env.y[ind]

        memory = (l_states , l_actions , l_rewards , l_next_states , l_done)

        if epsilon > epsilonMin:
            epsilon *= epsilonDecay

        # Replay memory
        if len(l_states) > batch_size:
            model = replay_memory(model , memory , batch_size = batch_size , gamma = gamma , actionsCount = actionsCount , epochs = 1 , verbose = 0)

        if (e+1)%200 == 0:
            print("\n")
            print("-"*40 , e+1 , "-"*40)
            score(env , model)
            print("-"*42 , "-"*42)

    print()
    print(np.average(all_scores))

    # Save weights
    model.save_weights(save_model_filename)

    return model,memory,epsilon


In [11]:
model , memory , epsilon = dqn_train(env = env , model = model , memory = memory , epsilon = epsilon , memoryMax = 50000 , batch_size = 1024, episodes = 1000)

 20%|██        | 203/1000 [00:04<00:40, 19.61it/s]



---------------------------------------- 200 ----------------------------------------
On Training Data 	F1 Score = 0.75	Accuracy = 0.75
On Testing Data 	F1 Score = 0.68	Accuracy = 0.68
------------------------------------------ ------------------------------------------


 40%|████      | 403/1000 [00:15<00:31, 18.95it/s]



---------------------------------------- 400 ----------------------------------------
On Training Data 	F1 Score = 0.75	Accuracy = 0.75
On Testing Data 	F1 Score = 0.68	Accuracy = 0.68
------------------------------------------ ------------------------------------------


 60%|██████    | 603/1000 [00:26<00:21, 18.09it/s]



---------------------------------------- 600 ----------------------------------------
On Training Data 	F1 Score = 0.74	Accuracy = 0.74
On Testing Data 	F1 Score = 0.68	Accuracy = 0.68
------------------------------------------ ------------------------------------------


 80%|████████  | 801/1000 [00:37<00:13, 14.92it/s]



---------------------------------------- 800 ----------------------------------------
On Training Data 	F1 Score = 0.75	Accuracy = 0.75
On Testing Data 	F1 Score = 0.68	Accuracy = 0.68
------------------------------------------ ------------------------------------------


100%|██████████| 1000/1000 [00:49<00:00, 20.26it/s]




---------------------------------------- 1000 ----------------------------------------
On Training Data 	F1 Score = 0.7299999999999999	Accuracy = 0.73
On Testing Data 	F1 Score = 0.64	Accuracy = 0.64
------------------------------------------ ------------------------------------------

8.26
