In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import torch
import random
import copy
import math
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
np.random.seed(42)
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

#### The data

In [3]:
df = pd.read_csv('data/balanced_dataset.csv')
df.head()

Unnamed: 0,length,width,height,label
0,25,2,77,A
1,26,12,24,B
2,5,4,16,B
3,10,2,56,B
4,5,6,87,B


In [4]:
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
df.head()

Unnamed: 0,length,width,height,label
0,25,2,77,0
1,26,12,24,1
2,5,4,16,1
3,10,2,56,1
4,5,6,87,1


In [5]:
df.label.value_counts()

2    1783
1    1783
0    1783
Name: label, dtype: int64

In [6]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, stratify=y_trainval, random_state=42)

In [7]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

#### The Environment class

In [8]:
class Env:
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.x = np.zeros((1, 3), dtype=np.float32)
        self.y = -1
        self.num_classes = 3
        self.sample_num = len(X)
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.total_reward = 0
        self.trajectory = []
        self.episode_length = 0
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        
    def reset(self, i): #I am going to go through the data sequentially
        #print(f'Current epsiode completed. Resetting to index {i}')
        if i < self.sample_num:
            self.trajectory = []
            self.total_reward = 0
            self.episode_length = 0
            self.state = np.zeros((1, 3), dtype=np.float32)
            self.x, self.y = self.X[i], self.Y[i]
            self.available_actions = np.zeros((1, 6), dtype=np.float32)
            #return self.state, self.available_actions
            return self.state
        else:
            pass
        
    def get_next_state(self, action):
        self.available_actions[0, action] =1
        if action < 3: #the classes
            next_state = None
        elif (action >=3) & (action <=5):
            feature_idx = action - 3
            self.x = self.x.reshape(-1, 3)
            x_value = self.x[0, feature_idx]
            next_state = copy.deepcopy(self.state)
            next_state[0, feature_idx] = x_value
        return next_state
    
    def step(self, action):
        ep_length = 1
        reward = 0
        next_state = self.get_next_state(action)
        if action < 3:
            if action == self.y:
                reward += 1
            else:
                reward -= 1
            y_actual = self.y 
            y_pred = action
            done = True
        else:
            reward += 0
            y_actual = np.nan
            y_pred = np.nan
            done=False
            
        self.total_reward+=reward
        self.episode_length+= ep_length
        total_reward_metric = self.total_reward 
        total_length_metric = self.episode_length
        
        info = {'episode_length':total_length_metric, 'total_reward': total_reward_metric, 'y_actual':y_actual, 
                   'y_pred': y_pred}
        #print(f'The metrics: {metrics}')
        return next_state, self.available_actions, reward, done, info

#### The Agent

In [9]:
class KugezesaAgent:
    def __init__(self, state_size, is_eval=False, model_name=''):
        self.state_size = state_size #the features are also 3
        self.action_size=6 #length, width, height and the classes 0, 1, 2
        self.memory = deque(maxlen=10000)
        self.inventory = []
        self.model_name = model_name
        self.is_eval = is_eval
        self.gamma = 0.95
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        self.model = load_model(model_name) if is_eval else self._model()
    
    def _model(self):
        model = Sequential()
        model.add(Dense(units=64, input_dim=self.state_size, activation = 'relu'))
        model.add(Dense(units=32, activation='relu'))
        model.add(Dense(units=8, activation = 'relu'))
        model.add(Dense(self.action_size, activation = 'linear'))
        model.compile(loss='mse', optimizer = Adam(learning_rate=0.001))
        return model
    
    def act(self, state):
        if not self.is_eval and random.random() <= self.epsilon:
            print('Acting randomly')
            return random.randrange(self.action_size)
        options = self.model.predict(state)
        print('Acting using q values')
        return np.argmax(options[0])
    
    def expReplay(self, batch_size):
        mini_batch = []
        l = len(self.memory)
        for i in range(1-batch_size+1, 1):
            mini_batch.append(self.memory[i])
        for state, action, reward, next_state, done in mini_batch:
            target = reward
            if not done:
                target = reward +self.gamma*np.amax(self.model.predict(next_state[0]))
            target_f = self.model.predict(state)
            target_f[0][action] = target
            self.model.fit(state, target_f, epochs=1, verbose=0)
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay

In [10]:
class TestingEnv(Env):
    def __init__(self, X, y):
        super().__init__(X, y)
        self.x, self.y = self.X[0], self.Y[0]
        self.idx = 0
        self.episode_length =len(self.trajectory)
        
    def step(self, action, name): 
        self.trajectory.append(name)
        reward = 0
        if action < 3:
            if action == self.y:
                reward += 1
            else:
                reward -= 1
            done = True
            y_actual = self.y 
            y_pred = action
            self.idx+=1
        
        else:
            reward += 0
            done = False
            y_actual = np.nan
            y_pred = np.nan 
        
        next_state = self.get_next_state(action)
        episode_number = self.idx
        pathway = self.trajectory
        self.total_reward += reward
        total_reward_metric = self.total_reward 
        info = {'episode_length':len(pathway), 'total_reward': total_reward_metric, 'y_actual':y_actual, 'y_pred': y_pred, 
                'pathway': pathway, 'done':done} 
        return next_state, info

#### The Memory Class

In [11]:
from collections import namedtuple, deque
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward', 'done', 'info'))
class Memory():
    def __init__(self, size):
        self.total_size = size
        self.memory = deque([], maxlen = size)
        self.keys = ['state', 'action', 'next_state', 'reward', 'done', 'info']
        
    def insert(self, *args):
        transition = Transition(*args)
        self.memory.append(transition)
        
    def sample(self, batch_size):
        transition_samples = random.sample(self.memory, batch_size)
        return transition_samples
    
    def get_last_n_samples(self, size): #compare output from sample and get_last_n samples
        n_samples = []
        for i in range(-1*size, 1):
            n_samples.append(self.memory[i])
        return n_samples
    
    def current_transitions(self):
        '''Returns the number of current transitions in memory'''
        return len(self.memory)
        
    def get_latest_transitions(self, transition_number): #I need to delete this i think but later
        '''Get the last inserted transitions'''
        transition_samples = []
        current = self.current_transitions()
        for i in range(current):
            if i >= (current- transition_number):
                transition_samples.append(self.memory[i])
        return transition_samples

    def reset(self):
        ''' Resets the memory'''
        self.memory = deque([], maxlen=self.total_size)

#### The NN

In [12]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class NN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NN, self).__init__()
        self.policy_fn = torch.nn.Sequential( 
            torch.nn.Linear(input_size, hidden_size, bias=True),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size,hidden_size),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, output_size, bias=True)
        )
        self.input_size = input_size
        self.to(device)
    
    def forward(self, batch):
        x = batch.view(-1, self.input_size) 
        x.to(device)
        q_values = self.policy_fn(x)
        return q_values

In [13]:
class NN2(torch.nn.Module):
    def __init__(self, feature_num, action_num):
        super(NN2, self).__init__()
        
        self.feature_num = feature_num
        
        self.layer_1 = torch.nn.Linear(feature_num, 512)
        self.layer_2 = torch.nn.Linear(512, 128)
        self.layer_3 = torch.nn.Linear(128, 64)
        self.layer_out = torch.nn.Linear(64, action_num) 
        
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(p=0.2)
        #self.batchnorm1 = torch.nn.BatchNorm1d(512)
        #self.batchnorm2 = torch.nn.BatchNorm1d(128)
        #self.batchnorm3 = torch.nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = x.view(-1, self.feature_num)
        x = self.layer_1(x)
        #x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        #x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        #x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

#### The DQN

In [14]:
import torch.optim as optim
import torch.nn.functional as F
class DQN():
    def __init__(self, memory, input_size, hidden_size, output_size):
        self.memory = memory
        self.gamma=0.95
        #self.policy_network = NN(input_size, hidden_size, output_size) #model
        #self.target_network = NN(input_size, hidden_size, output_size) #_model
        self.policy_network = NN2(3, 6)
        self.target_network = NN2(3, 6)
        self.target_network.load_state_dict(self.policy_network.state_dict())
        self.target_network.eval()
        self.optimizer = optim.RMSprop(self.policy_network.parameters(), lr=0.00001) 
        
        
    def save_networks(self, filename): # I might need to delete this
        torch.save(self.policy_network.state_dict(), f'{filename}_policy')
        torch.save(self.target_network.state_dict(), f'{filename}_target')

    def load_networks(self, filename):# I might need to delete this
        if not os.path.exists(f'{filename}_policy'):
            print('Folder from which to reload networks does not exist')
        self.policy_network.load_state_dict(torch.load(f'{filename}_policy'))
        self.target_network.load_state_dict(torch.load(f'{filename}_target'))
        
    def predict_q(self, state, target=False): #Target = true or false.. predict_np in classification
        state = torch.from_numpy(state).to(device)
        if target:
            q = self.target_network(state)
            q_values = q.detach().cpu().numpy() 
        else:
            q = self.policy_network(state)
            q_values = q.detach().cpu().numpy() 
        return q_values
    
    def update_target_network(self):
        self.target_network.load_state_dict(self.policy_network.state_dict())
        
    
    #to transfer
    def train(self, terminal_state, step):
        if len(self.replay_memory) < MIN_REPLAY_MEMORY_SIZE:
            return
        transitions_batch = self.memory.get_latest_transitions(batch_size)
        batch = Transition(*zip(*transitions_batch))
        non_final_mask = np.arrray(list(batch.done), dtype='bool')
        non_final_next_states = np.array([batch.next_state[i] for i in range(batch_size) if batch.done[i] is False])
        batch_states = np.array(batch.state)  
        batch_actions = batch.action
        batch_rewards = batch.reward
        #state_action_values = self.policy_network(batch_states).gather(1, batch_actions.type(torch.int64).view(1, -1)) 
        state_action_values = self.policy_network.pedict(batch_states)
        
        next_state_values = np.zeros(batch_size, device=device)
        next_state_values[~non_final_mask] = self.target_network(non_final_next_states).max(1)[0].detach()
        
        expected_state_action_values = (self.gamma*next_state_values)+batch_rewards #the bellman equation
        

        # Get current states from minibatch, then query NN model for Q values
        batch_states = np.array([transition[0] for transition in minibatch])#batch_states
        state_action_values = self.model.predict(batch_states)

        # Get future states from minibatch, then query NN model for Q values
        # When using target network, query it, otherwise main network should be queried
        transition_arr = [transition[3] for transition in minibatch]
        print(transition_arr)
        new_current_sta = np.array([transition[3] for transition in minibatch]).astype('float32')
        future_qs_list = self.target_model.predict(new_current_states)

        X = []
        y = []
        
    ###### END HERE!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
    
        
    def train_dqn(self, batch_size):
        #print('Train dqn function')
        #transitions_batch = self.memory.sample(batch_size) 
        transitions_batch = self.memory.get_latest_transitions(batch_size)
        batch = Transition(*zip(*transitions_batch))
        #print(f'Zipped transitions batch: {batch}')
        non_final_mask = torch.tensor(list(batch.done), dtype=torch.bool).to(device=device) #e.g. [0,1,0] for 3 transitions
        #print(f'Non final mask: {non_final_mask}')
        non_final_next_states = torch.Tensor(np.array([batch.next_state[i] for i in range(batch_size) if batch.done[i] is False])).to(device=device)
        #print(f'non final next states: {non_final_next_states}')
        batch_states = torch.Tensor(np.array(batch.state)).to(device=device)
        #print(f'batch states: {batch_states}')
        batch_actions = torch.Tensor(batch.action).to(device=device)
        #print(f'batch actions: {batch_actions}')
        batch_rewards = torch.Tensor(batch.reward).to(device=device)
        #print(f'batch rewards: {batch_rewards}')
               
        

        state_action_values = self.policy_network(batch_states).gather(1, batch_actions.type(torch.int64).view(1, -1))  
        #print(f'state_action_values shape: {state_action_values.shape}')
        #print(f'state_action_values: {state_action_values}')
        
        next_state_values = torch.zeros(batch_size, device=device)
        next_state_values[~non_final_mask] = self.target_network(non_final_next_states).max(1)[0].detach()
        #print(f'next_state_values shape: {next_state_values.shape}')
        #print(f'next state values: {next_state_values}')
        
        expected_state_action_values = (self.gamma*next_state_values)+batch_rewards #the bellman equation
        #print(f'expected state action values shape: {expected_state_action_values.shape}')
        #print(f'expected state action values: {expected_state_action_values}')
        criterion = torch.nn.MSELoss()
        
        #mse_loss = mse(state_action_values, expected_state_action_values.unsqueeze(1)) - original comment
        loss = criterion(state_action_values, expected_state_action_values.view(1, -1))
        #self.entropy_loss = torch.nn.CrossEntropyLoss() #original comment
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step() 
        #print(f'MSE loss: {loss.item()}')
        #return {'loss': {loss.item()}} #will probaly change this return statement
        return loss.item()

In [None]:
class DQN_Keras():
    
    def create_model(self):
        model = Sequential()
        model.add(Dropout(0.5, input_shape=(1, 3)))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(8, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(6, activation='linear'))
        model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'])
        return model
    
     def __init__(self, memory, input_size, hidden_size, output_size):
        self.memory = memory
        self.gamma=0.95
        #self.policy_network = NN(input_size, hidden_size, output_size) #model
        #self.target_network = NN(input_size, hidden_size, output_size) #_model
        self.policy_network = create_model()
        self.target_network = create_model()
        self.target_model.set_weights(self.policy_network.get_weights())
        #self.target_network.load_state_dict(self.policy_network.state_dict())
        #self.target_network.eval()
        #self.optimizer = optim.RMSprop(self.policy_network.parameters(), lr=0.00001) 
        
        
    def save_networks(self, filename): # I might need to delete this
        torch.save(self.policy_network.state_dict(), f'{filename}_policy')
        torch.save(self.target_network.state_dict(), f'{filename}_target')

    def load_networks(self, filename):# I might need to delete this
        if not os.path.exists(f'{filename}_policy'):
            print('Folder from which to reload networks does not exist')
        self.policy_network.load_state_dict(torch.load(f'{filename}_policy'))
        self.target_network.load_state_dict(torch.load(f'{filename}_target'))
        
        

#### The Agent

In [91]:
class Agent():
    def __init__(self, env, memory, dqn):
        self.env = env
        self.dqn = dqn
        self.memory = self.dqn.memory
        
        self.action_space = ['A', 'B', 'C', 'length', 'width', 'height']
        self.n_actions = len(self.action_space)
        #self.epsilon = 0.1 #slot_set of dialogue system will come later
        #self.epsilon = 1.0
        #self.epsilon_start = 1.0   #for epsilon-greedy - chosen arbitrarily -- this is for epsilon start
        #self.epsilon_end = 0.1
        #self.epsilon_decay = 200
        self.available_actions = self.env.available_actions
        
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
    def get_action(self, state, available_actions): #what action is taken next by agent in this state
        sample = random.random() 
        #print(f'Epsilon: {self.epsilon}')
        if sample > self.epsilon:
            #print('Using q values')
            with torch.no_grad():
                q_values = self.dqn.predict_q(state) #get q values for all actions shape(1, 31)
                available_q_values = q_values[available_actions==0] #q-values for actions not yet selected e.g. shape(20,)
                action_q = np.max(available_q_values)
                #action_q = np.max(q_values)
                action_index = np.where(q_values[0]==action_q)[0][0] #index of action with max q-value
        else:
            #print('Choosing randomly')
            available_indices = np.where(available_actions==0)[1]
            action_index = random.choice(available_indices)  
            #action_index = random.randrange(self.n_actions)
        action_name = self.action_space[action_index]
        return action_index, action_name
    
    def update_epsilon(self, epoch_number):
        self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * math.exp(-1. * epoch_number / self.epsilon_decay)
        
    def update_epsilon2(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def step(self):
        #print('Step by agent')
        current_state = self.env.state
        #print(f'The available actions used to get action: {self.env.available_actions}')
        action_index = self.get_action(current_state, self.env.available_actions)[0]
        next_state, self.available_actions, reward, done, info = self.env.step(action_index)
        #print('inserting into memory')
        #print(f'Current state: {current_state}')
        #print(f'action_index: {action_index}')
        #print(f'Next state: {next_state}')
        #print(f'Reward: {reward}')
        #print(f'Done: {done}')
        #print(f'Info: {info}')
        self.memory.insert(current_state, action_index, next_state, reward, done, info)
        if next_state is None:
            i = random.randint(0, self.env.sample_num-1)
            self.env.reset(i)
        else:
            self.env.state = next_state
            
    def test(self):
        test_df = pd.DataFrame()
        while self.env.idx < self.env.sample_num:
            #print(f'INDEX: {self.env.idx}')
            action_index, action_name, current_state, next_state, info = self.step()
            #print(f'Current state: {current_state}')
            #print(f'action index: {action_index}, action name: {action_name}')
            #print(f'Next state: {next_state}')
            #print(f'Info: {info}')
            if info['done']==True:
                test_df = test_df.append(info, ignore_index=True)
        return test_df

#### The Testing Agent

In [92]:
class TestingAgent(Agent):
    def __init__(self, env, memory, dqn):
        super().__init__(env, memory, dqn)
    
    def get_action(self, state, available_actions): 
        q_values = self.dqn.predict_q(state) 
        available_q_values = q_values[available_actions==0]
        action_q = np.max(available_q_values) 
        action_index = np.where(q_values[0]==action_q)[0][0]  
        #action_index = np.argmax(q_values[0]) 
        action_name = self.action_space[action_index]
        return action_index, action_name

    def load_networks(self, filename):
        self.dqn.load_networks(filename)

    def step(self): #state   ------ not finished where is done?
        current_state = self.env.state
        action_index, action_name = self.get_action(current_state, self.env.available_actions)
        next_state, info = self.env.step(action_index, action_name)
        if next_state is None:
            self.env.reset(self.env.idx)
        else:
            self.env.state = next_state
        return action_index, action_name, current_state, next_state, info
    
     

#### The Main function

In [93]:
from sklearn.preprocessing import label_binarize, LabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve, auc

In [94]:
def multiclass(actual_class, pred_class, average = "macro"):

    #creating a set of all the unique classes using the actual class list
    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        #creating a list of all the classes except the current class 
        other_class = [x for x in unique_class if x != per_class]

        #marking the current class as 1 and all other classes as 0
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]

        #using the sklearn metrics method to calculate the roc_auc_score
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    #print(f'Roc auc dict: {roc_auc_dict}')
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    
    #return roc_auc_dict
    return avg

In [95]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [96]:
def get_performance(X_test, y_test, dqn, isTest=False):
    testing_env = TestingEnv(X_test, y_test)
    testing_agent = TestingAgent(testing_env, None, dqn)
    if isTest:
        testing_agent.dqn.load_networks('models/model')
    test_df = testing_agent.test()
    acc, f1, roc_auc = test(test_df.y_actual, test_df.y_pred)
    return acc, f1, roc_auc, test_df

In [97]:
def main(X_train, y_train, X_val, y_val, X_test, y_test, epochs, epoch_steps, batch_size):
    memory = Memory(1000)
    env = Env(X_train, y_train)
    dqn = DQN(memory, 3, 64, 6)
    agent = Agent(env, memory, dqn)
    
    best_val_perf= {'accuracy':0, 'f1':0, 'roc_auc_score':0}

    print('INITIALIZING MEMORY .....') #maybe skip this and go straight to the 
    i = random.randint(0, len(X_train)-1)  #try starting wit zero too and moving in a sequential manner
    env.reset(i)
    while memory.current_transitions() < memory.total_size:
    #while memory.current_transitions() < 5:
        agent.step()
        
    print('TRAINING')
    for epoch in range(epochs):
        loss = dqn.train_dqn(batch_size)
        #print(f'loss: {loss}')
        if epoch%5 ==0:
            dqn.update_target_network()
        #if epoch%1 == 0:
            #agent.update_epsilon(epoch)
        agent.update_epsilon2()
        for i in range(epoch_steps):
            agent.step()
        #if epoch%(epochs/100) ==0:            
        validation_perf = get_performance(X_val, y_val, dqn)
        if validation_perf[0] > best_val_perf['accuracy']:
            dqn.save_networks('models/model')
            best_val_perf['accuracy'] = validation_perf[0]
            best_val_perf['f1'] = validation_perf[1]
            best_val_perf['roc_auc_score'] = validation_perf[2]

            print(f'********Validation Performance at epoch {epoch}********')
            print(f'Accuracy: {validation_perf[0]}, F1 score: {validation_perf[1]}, ROC-AUC Score: {validation_perf[2]}')
            print(f'Unique predicted classes: {validation_perf[3].y_pred.unique()}')
            
            test_perf = get_performance(X_test, y_test, dqn, isTest=True)
            
            #dqn.memory.reset

    
    print('TESTING')
    test_perf = get_performance(X_test, y_test, dqn, isTest=True)
    print('**********Test Performance**********')
    print(f'Accuracy: {test_perf[0]}, F1 score: {test_perf[1]}, ROC-AUC Score: {test_perf[2]}')
    print(f'Unique predicted classes: {test_perf[3].y_pred.unique()}')
    
    return test_perf

#### delete from here

#### All together

In [98]:
test_perf = main(X_train, y_train, X_val, y_val, X_test, y_test, 1000, 5, 32)
test_df = test_perf[3]
test_df.head()

INITIALIZING MEMORY .....
TRAINING
********Validation Performance at epoch 0********
Accuracy: 0.32710280373831774, F1 score: 0.18071211307926283, ROC-AUC Score: 0.4947491105385842
Unique predicted classes: [0. 1.]
********Validation Performance at epoch 1********
Accuracy: 0.35046728971962615, F1 score: 0.22154607297464443, ROC-AUC Score: 0.5122520754099702
Unique predicted classes: [0. 1.]
********Validation Performance at epoch 25********
Accuracy: 0.36682242990654207, F1 score: 0.26314878534650893, ROC-AUC Score: 0.524512358574627
Unique predicted classes: [0. 1. 2.]
********Validation Performance at epoch 101********
Accuracy: 0.3691588785046729, F1 score: 0.26432634242902603, ROC-AUC Score: 0.5262544473070788
Unique predicted classes: [0. 1.]
********Validation Performance at epoch 104********
Accuracy: 0.3855140186915888, F1 score: 0.27459311329195474, ROC-AUC Score: 0.5385188199495093
Unique predicted classes: [0. 1. 2.]
********Validation Performance at epoch 115********
Accur

Unnamed: 0,done,episode_length,pathway,total_reward,y_actual,y_pred
0,1.0,4.0,"[length, height, width, B]",-1.0,0.0,1.0
1,1.0,4.0,"[length, height, width, A]",-1.0,2.0,0.0
2,1.0,4.0,"[length, width, height, A]",-1.0,1.0,0.0
3,1.0,4.0,"[height, length, width, B]",-1.0,2.0,1.0
4,1.0,4.0,"[height, length, width, A]",1.0,0.0,0.0


In [99]:
test_df.y_actual.value_counts()

1.0    357
2.0    357
0.0    356
Name: y_actual, dtype: int64

In [100]:
test_df.y_pred.value_counts()

0.0    541
1.0    405
2.0    124
Name: y_pred, dtype: int64

#### Performance Evaluation

In [101]:
np.mean(test_df.episode_length)

3.9710280373831774