In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import torch
import random
import copy
import math
import os
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
np.random.seed(42)
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

#### The data

In [3]:
df = pd.read_csv('data/dataset_10000.csv')
df.head()

Unnamed: 0,length,width,height,label
0,7,7,49,B
1,20,13,75,B
2,29,2,81,A
3,15,6,39,A
4,11,9,71,B


In [4]:
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
df.head()

Unnamed: 0,length,width,height,label
0,7,7,49,1
1,20,13,75,1
2,29,2,81,0
3,15,6,39,0
4,11,9,71,1


In [5]:
df.label.value_counts()

1    4676
0    3541
2    1783
Name: label, dtype: int64

In [6]:
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
#X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, stratify=y_trainval, random_state=42)

In [7]:
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
#X_val = scaler.transform(X_val)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
#X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

#### The Environment class

In [8]:
class Env:
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.x = np.zeros((1, 3), dtype=np.float32)
        self.y = -1
        self.num_classes = 3
        self.actions = ['A', 'B', 'C', 'length', 'width', 'height']
        self.sample_num = len(X)
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.total_reward = 0
        self.trajectory = []
        self.episode_length = 0
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        self.idx = -1
        
    def step(self, action):
        self.episode_length +=1
        self.trajectory.append(self.actions[action])
        reward = 0
        self.state = self.get_next_state(action)
        if action < self.num_classes:
            if action == self.y:
                reward += 1
            else:
                reward -= 1
            y_actual = self.y 
            y_pred = action
            done = True
        else:
            reward += 0
            y_actual = np.nan
            y_pred = np.nan
            done = False
            
        self.total_reward+=reward
        total_reward_metric = self.total_reward 
        total_length_metric = self.episode_length
        
        info = {'episode_length':total_length_metric, 'total_reward': total_reward_metric, 'y_actual':y_actual, 
                'y_pred': y_pred, 'done':done}
        #print(f'The metrics: {metrics}')
        return self.state, reward, done, info
    
    def render(self):
        print(f'STEP {self.episode_length} for index {self.idx}')
        print(f'x: {self.x}')
        print(f'y: {self.y}')
        print(f'Current state: {self.state}')
        print(f'Total reward: {self.total_reward}')
        print(f'Trajectory: {self.trajectory}')
    
#     def reset(self, i): #I am going to go through the data sequentially
#         #print(f'Current epsiode completed. Resetting to index {i}')
#         if i < self.sample_num:
#             self.trajectory = []
#             self.total_reward = 0
#             self.episode_length = 0
#             self.state = np.zeros((1, 3), dtype=np.float32)
#             self.x, self.y = self.X[i], self.Y[i]
#             self.available_actions = np.zeros((1, 6), dtype=np.float32)
#             #return self.state, self.available_actions
#             return self.state
#         else:
#             pass
        
    def reset(self):
        print('Resetting')
        self.idx = random.randint(0, self.sample_num-1)
        #print(f'Index: {self.idx}')
        self.x, self.y = self.X[self.idx], self.Y[self.idx]
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.trajectory = []
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        self.episode_length = 0
        self.total_reward = 0
        return self.state
        
    def get_next_state(self, action):
        self.available_actions[0, action] =1
        self.state = self.state.reshape(-1, 3)
        next_state = copy.deepcopy(self.state)
        if action>=self.num_classes:
            feature_idx = action - self.num_classes
            self.x = self.x.reshape(-1, 3)
            x_value = self.x[0, feature_idx]
            next_state[0, feature_idx] = x_value
        return next_state

In [9]:
class TestingEnv():
    def __init__(self, X, Y):        
        self.X = X
        self.Y = Y
        self.x = self.y[0]
        self.y = self.x[0]
        self.num_classes = 3
        self.actions = ['A', 'B', 'C', 'length', 'width', 'height']
        self.sample_num = len(X)
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.total_reward = 0
        self.trajectory = []
        #self.episode_length = 0
        self.episode_length = len(self.trajectory)
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        self.idx = 0
    
    def reset(self, i):
        #print(f'i: {i}, sample_num: {self.sample_num}')
        if i < self.sample_num:
            self.trajectory = []
            self.total_reward = 0
            self.state = np.zeros((1, 3), dtype=np.float32)
            self.x, self.y = self.X[i], self.Y[i]
            self.available_actions = np.zeros((1, 6), dtype=np.float32)
            return self.state
        else:
            pass

    def get_next_state(self, action):
        self.available_actions[0, action] =1
        self.state = self.state.reshape(-1, 3)
        next_state = copy.deepcopy(self.state)
        if action>=self.num_classes:
            feature_idx = action - self.num_classes
            self.x = self.x.reshape(-1, 3)
            x_value = self.x[0, feature_idx]
            next_state[0, feature_idx] = x_value
        return next_state

    def step(self, action):
        self.episode_length +=1
        self.trajectory.append(self.actions[action])
        reward = 0
        self.state = self.get_next_state(action)
        if action < self.num_classes:
            if action == self.y:
                reward += 1
            else:
                reward -= 1
            y_actual = self.y 
            y_pred = action
            done = True
            self.idx+=1
        else:
            reward += 0
            y_actual = np.nan
            y_pred = np.nan
            done = False
            
        self.total_reward+=reward
        total_reward_metric = self.total_reward 
        total_length_metric = self.episode_length
        
        info = {'episode_length':total_length_metric, 'total_reward': total_reward_metric, 'y_actual':y_actual, 
                'y_pred': y_pred, 'done':done}
        #print(f'The metrics: {metrics}')
        return self.state, reward, done, info
        
        metrics = {'episode_number': episode_number, 'done':done, 'correct_diagnosis': correct_diag, 'episode_length':len(pathway),
        'terminated':terminated, 'total_reward': total_reward_metric, 'y_actual':y_actual, 'y_pred': y_pred, 'pathway': pathway} 
        #return next_state, self.available_actions, reward, done, metrics
        return next_state, metrics

#### The Memory Class

In [10]:
from collections import namedtuple, deque
Transition = namedtuple('Transition',
                        ('state', 'action', 'next_state', 'reward', 'done', 'info'))
class Memory():
    def __init__(self, size):
        self.total_size = size
        self.memory = deque([], maxlen = size)
        self.keys = ['state', 'action', 'next_state', 'reward', 'done', 'info']
        
    def insert(self, *args):
        transition = Transition(*args)
        self.memory.append(transition)
        
    def sample(self, batch_size):
        transition_samples = random.sample(self.memory, batch_size)
        return transition_samples
    
    def get_last_n_samples(self, size): #compare output from sample and get_last_n samples
        n_samples = []
        for i in range(-1*size, 1):
            n_samples.append(self.memory[i])
        return n_samples
    
    def current_transitions(self):
        '''Returns the number of current transitions in memory'''
        return len(self.memory)
        
    def get_latest_transitions(self, transition_number): #I need to delete this i think but later
        '''Get the last inserted transitions'''
        transition_samples = []
        current = self.current_transitions()
        for i in range(current):
            if i >= (current- transition_number):
                transition_samples.append(self.memory[i])
        return transition_samples

    def reset(self):
        ''' Resets the memory'''
        self.memory = deque([], maxlen=self.total_size)

#### The NN

In [11]:
device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')

class NN(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size):
        super(NN, self).__init__()
        self.policy_fn = torch.nn.Sequential( 
            torch.nn.Linear(input_size, hidden_size, bias=True),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size,hidden_size),
            torch.nn.Dropout(0.5),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, output_size, bias=True)
        )
        self.input_size = input_size
        self.to(device)
    
    def forward(self, batch):
        x = batch.view(-1, self.input_size) 
        x.to(device)
        q_values = self.policy_fn(x)
        return q_values

In [12]:
class NN2(torch.nn.Module):
    def __init__(self, feature_num, action_num):
        super(NN2, self).__init__()
        
        self.feature_num = feature_num
        
        self.layer_1 = torch.nn.Linear(feature_num, 512)
        self.layer_2 = torch.nn.Linear(512, 128)
        self.layer_3 = torch.nn.Linear(128, 64)
        self.layer_out = torch.nn.Linear(64, action_num) 
        
        self.relu = torch.nn.ReLU()
        self.dropout = torch.nn.Dropout(p=0.2)
        #self.batchnorm1 = torch.nn.BatchNorm1d(512)
        #self.batchnorm2 = torch.nn.BatchNorm1d(128)
        #self.batchnorm3 = torch.nn.BatchNorm1d(64)
        
    def forward(self, x):
        x = x.view(-1, self.feature_num)
        x = self.layer_1(x)
        #x = self.batchnorm1(x)
        x = self.relu(x)
        
        x = self.layer_2(x)
        #x = self.batchnorm2(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_3(x)
        #x = self.batchnorm3(x)
        x = self.relu(x)
        x = self.dropout(x)
        
        x = self.layer_out(x)
        
        return x

#### The DQN

In [13]:
import torch.optim as optim
import torch.nn.functional as F
class DQN():
    def __init__(self, memory, input_size, hidden_size, output_size):
        self.memory = memory
        self.gamma=0.95
        self.policy_network = NN(input_size, hidden_size, output_size) #model
        self.target_network = NN(input_size, hidden_size, output_size) #_model
        #self.policy_network = NN2(3, 6)
        #self.target_network = NN2(3, 6)
        self.target_network.load_state_dict(self.policy_network.state_dict())
        self.target_network.eval()
        self.optimizer = optim.RMSprop(self.policy_network.parameters(), lr=0.00001) 
        
        
    def save_networks(self, filename): # I might need to delete this
        torch.save(self.policy_network.state_dict(), f'{filename}_policy')
        torch.save(self.target_network.state_dict(), f'{filename}_target')

    def load_networks(self, filename):# I might need to delete this
        if not os.path.exists(f'{filename}_policy'):
            print('Folder from which to reload networks does not exist')
        self.policy_network.load_state_dict(torch.load(f'{filename}_policy'))
        self.target_network.load_state_dict(torch.load(f'{filename}_target'))
        
    def predict_q(self, state, target=False): #Target = true or false.. predict_np in classification
        state = torch.from_numpy(state).to(device)
        if target:
            q = self.target_network(state)
            q_values = q.detach().cpu().numpy() 
        else:
            q = self.policy_network(state)
            q_values = q.detach().cpu().numpy() 
        return q_values
    
    def update_target_network(self):
        self.target_network.load_state_dict(self.policy_network.state_dict())
        
        
    def train_dqn(self, batch_size):
        #print('Train dqn function')
        #transitions_batch = self.memory.sample(batch_size) 
        transitions_batch = self.memory.get_latest_transitions(batch_size)
        batch = Transition(*zip(*transitions_batch))
        #print(f'Zipped transitions batch: {batch}')
        non_final_mask = torch.tensor(list(batch.done), dtype=torch.bool).to(device=device) #e.g. [0,1,0] for 3 transitions
        #print(f'Non final mask shape: {non_final_mask.shape}')
        #print(f'Non final mask: {non_final_mask}')
        non_final_next_states = torch.Tensor(np.array([batch.next_state[i] for i in range(batch_size) if batch.done[i] is False])).to(device=device)
        #print(f'non final next states shape: {non_final_next_states.shape}')
        #print(f'non final next states: {non_final_next_states}')
        
        batch_states = torch.Tensor(np.array(batch.state)).to(device=device)
        #print(f'batch states: {batch_states}')
        batch_actions = torch.Tensor(batch.action).to(device=device)
        #print(f'batch actions: {batch_actions}')
        batch_rewards = torch.Tensor(batch.reward).to(device=device)
        #print(f'batch rewards: {batch_rewards}')
               
        

        state_action_values = self.policy_network(batch_states).gather(1, batch_actions.type(torch.int64).view(1, -1))  
        #print(f'state_action_values shape: {state_action_values.shape}')
        #print(f'state_action_values: {state_action_values}')
        
        next_state_values = torch.zeros(batch_size, device=device)
        next_state_values[~non_final_mask] = self.target_network(non_final_next_states).max(1)[0].detach()
        #print(f'next_state_values shape: {next_state_values.shape}')
        #print(f'next state values: {next_state_values}')
        
        expected_state_action_values = (self.gamma*next_state_values)+batch_rewards #the bellman equation
        #print(f'expected state action values shape: {expected_state_action_values.shape}')
        #print(f'expected state action values: {expected_state_action_values}')
        criterion = torch.nn.MSELoss()
        
        #mse_loss = mse(state_action_values, expected_state_action_values.unsqueeze(1)) - original comment
        loss = criterion(state_action_values, expected_state_action_values.view(1, -1))
        #self.entropy_loss = torch.nn.CrossEntropyLoss() #original comment
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step() 
        #print(f'MSE loss: {loss.item()}')
        #return {'loss': {loss.item()}} #will probaly change this return statement
        return loss.item()

#### The Agent

In [14]:
class Agent():
    def __init__(self, env, memory, dqn):
        self.env = env
        self.dqn = dqn
        self.memory = self.dqn.memory
        
        self.action_space = ['A', 'B', 'C', 'length', 'width', 'height']
        self.n_actions = len(self.action_space)
        #self.epsilon = 0.1 #slot_set of dialogue system will come later
        #self.epsilon = 1.0
        #self.epsilon_start = 1.0   #for epsilon-greedy - chosen arbitrarily -- this is for epsilon start
        #self.epsilon_end = 0.1
        #self.epsilon_decay = 200
        self.available_actions = self.env.available_actions
        
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995
        
    def get_action(self, state, available_actions): #what action is taken next by agent in this state
        sample = random.random() 
        #print(f'available actions: {available_actions}')
        #print(f'Epsilon: {self.epsilon}')
        if sample > self.epsilon:
            #print('Using q values')
            with torch.no_grad():
                q_values = self.dqn.predict_q(state) #get q values for all actions shape(1, 31)
                available_q_values = q_values[available_actions==0] #q-values for actions not yet selected e.g. shape(20,)
                action_q = np.max(available_q_values)
                #action_q = np.max(q_values)
                action_index = np.where(q_values[0]==action_q)[0][0] #index of action with max q-value
        else:
            #print('Choosing randomly')
            available_indices = np.where(available_actions==0)[1]
            action_index = random.choice(available_indices)  
            #action_index = random.randrange(self.n_actions)
        action_name = self.action_space[action_index]
        return action_index, action_name
    
    def update_epsilon(self, epoch_number):
        self.epsilon = self.epsilon_end + (self.epsilon_start - self.epsilon_end) * math.exp(-1. * epoch_number / self.epsilon_decay)
        
    def update_epsilon2(self):
        if self.epsilon > self.epsilon_min:
            self.epsilon *= self.epsilon_decay
            
    def step(self):
        #print('Step by agent')
        current_state = self.env.state
        #print(f'The available actions used to get action: {self.env.available_actions}')
        action_index = self.get_action(current_state, self.env.available_actions)[0]
        next_state, reward, done, info = self.env.step(action_index)
#         print('inserting into memory')
#         print(f'Current state: {current_state}')
#         print(f'action_index: {action_index}')
#         print(f'Next state: {next_state}')
#         print(f'Reward: {reward}')
#         print(f'Done: {done}')
#         print(f'Info: {info}')
        self.memory.insert(current_state, action_index, next_state, reward, done, info)
        if done == True:
            #i = random.randint(0, self.env.sample_num-1)
            #self.env.reset(i)
            self.env.reset()
            #print('reset inside training agent')
        else:
            self.env.state = next_state
        return current_state, action_index, next_state, reward, done, info

#### The Testing Agent

In [15]:
class TestingAgent():
    def __init__(self, dqn, env):        
        self.env = env
        self.dqn = dqn
        self.memory = self.dqn.memory
        self.action_space = ['A', 'B', 'C', 'length', 'width', 'height']
        self.n_actions = len(self.action_space)
        self.available_actions = self.env.available_actions
        self.epsilon = 1.0
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.995

    def get_action(self, state, available_actions): 
        q_values = self.dqn.predict_q(state) 
        available_q_values = q_values[available_actions==0]
        action_q = np.max(available_q_values) 
        action_index = np.where(q_values[0]==action_q)[0][0]         
        action_name = self.action_space[action_index]
        return action_index, action_name

    def load_networks(self, filename):
        self.dqn.load_networks(filename)

    def step(self): #state   ------ not finished where is done?
        #print('STEP BY AGENT')
        current_state = self.env.feature_state
        #print(f'The current state used to acquire action: {current_state}')
        #print(f'The available actions used to acquire action: {self.env.available_actions}')
        action_index, action_name = self.get_action(current_state, self.env.available_actions)
        #print(f'The action acquired: {action_index}')
        #next_state, self.available_actions, reward, done, info = self.env.step(action_index, action_name)
        next_state, info = self.env.step(action_index, action_name)
        #print(f'The next state: {next_state}')
        #print(f'The info: {info}')
        #print(f'action_index:{action_index}')
        #print(f'current state:{current_state}')
        #print(f'next state:{next_state}')
        #self.memory.insert(current_state, action_index, next_state, reward,  done, info)
        if info['done'] ==True:
            #print('RESETTING ENVIRONMENT .....')
            self.env.reset(self.env.idx)
            #self.env.feature_state = next_state
        else:
            self.env.feature_state = next_state
        #print('WHAT IS BEING RETURNED')
        #print(f'action index: {action_index}')
        #print(f'action_name: {action_name}')
        #print(f'current state: {current_state}')
        #print(f'next state: {next_state}')
        #print(f'info: {info}')
        return action_index, action_name, current_state, next_state, info

    def test(self):
        test_df = pd.DataFrame()
        while self.env.idx < self.env.sample_num:
            #print(f'the index: {self.env.idx}')
            #print(f'the x: {self.env.x}')
            #print(f'the y: {self.env.y}')
            #print('does it print this successfully 1?')
            action_index, action_name, current_state, next_state, info = self.step()
            print(f'the current state: {current_state}')
            print(f'the action index: {action_index}, the action name: {action_name}')
            print(f'the next state: {next_state}')
            print(f'the info: {info}')
            #print('Does it print this successfully2?')
            if info['done']==True:
                print('Appending this info to the test_df......')
                #print(f'Info being appended: {info}')
                test_df = test_df.append(info, ignore_index=True)
        return test_df
    

#### Where the magic happens

In [16]:
from sklearn.preprocessing import label_binarize, LabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, confusion_matrix, classification_report, roc_curve, auc

In [17]:
def multiclass(actual_class, pred_class, average = "macro"):

    #creating a set of all the unique classes using the actual class list
    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        #creating a list of all the classes except the current class 
        other_class = [x for x in unique_class if x != per_class]

        #marking the current class as 1 and all other classes as 0
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]

        #using the sklearn metrics method to calculate the roc_auc_score
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    #print(f'Roc auc dict: {roc_auc_dict}')
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    
    #return roc_auc_dict
    return avg

In [18]:
def get_metrics(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [19]:
def get_performance(X, Y, dqn, filename=False, isLoad=False, isRandom=False):
    env = TestingEnv(X, Y)
    if isLoad:
        dqn.load_networks(filename)
    if isRandom:
        agent = RandomAgent(data, env)
    else:
        agent = TestingAgent(dqn, env)
    env.reset(0)
    df = agent.test()
    unique_pred_classes = list(df.y_pred.unique())
    print(f'unique predicted classes: {unique_pred_classes}')
    acc, f1, roc_auc = get_metrics(df['y_actual'], df['y_pred'])
    return acc, f1, roc_auc

In [20]:
def training(X_train, y_train, epochs, epoch_steps, batch_size):
    memory = Memory(10000)
    env = Env(X_train, y_train)
    dqn = DQN(memory, 3, 64, 6)
    agent = Agent(env, memory, dqn)
    print('INITIALIZING MEMORY .....') 
    #i = random.randint(0, len(X_train)-1) 
    env.reset()
    #while memory.current_transitions() < memory.total_size:
    while memory.current_transitions() < memory.total_size:
        agent.step()
        
    print('TRAINING')
    for epoch in range(epochs):
        loss = dqn.train_dqn(batch_size)
        if epoch%100 ==0:
            dqn.update_target_network()
        agent.update_epsilon2()
        for i in range(epoch_steps):
            agent.step()
    return dqn

In [21]:
X_train[1143], y_train[1143]

(array([0.10714286, 0.69230769, 0.94897959]), 2)

In [22]:
x_small = X_test[:5]
y_small = y_test[:5]
x_small[0], y_small[0]

(array([0.60714286, 0.84615385, 0.29591837]), 1)

In [23]:
mydqn = training(X_train, y_train, 10, 30, 32)

INITIALIZING MEMORY .....
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Rese

Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting
Resetting


In [24]:
#acc, f1, roc_auc, test_df = testing(X_test[:5], y_test[:5], mydqn)
acc, f1, roc_auc = get_performance(X_test[:5], y_test[:5], mydqn)
acc, f1, roc_auc

NameError: name 'constants' is not defined

In [None]:
np.mean(test_df.episode_length)