In [1]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
#tensorflow.random.set_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### The Data

In [3]:
df = pd.read_csv('data/dataset_10000.csv')
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

#### The Environment

In [4]:
import copy
from gym import Env
from gym.spaces import Discrete, Box

In [5]:
class SyntheticEnv(Env):
    def __init__(self, X, Y, random=True):
        super(SyntheticEnv, self).__init__()
        self.action_space = Discrete(6)
        self.observation_space = Box(0, 1.5, (3,))
        self.actions = ['A', 'B', 'C', 'length', 'width', 'height']
        self.max_steps = 7
        self.X = X
        self.Y = Y
        self.sample_num = len(X)
        self.idx = -1
        self.x = np.zeros((3,), dtype=np.float32)
        self.y = np.nan
        self.state = np.zeros((3,), dtype=np.float32)
        self.num_classes = 3
        self.episode_length = 0
        self.trajectory = []
        self.total_reward = 0
        self.random = random
        
    
    def step(self, action):
        #print('A step in the environment')
        #print(f'action: {action}')
        self.episode_length += 1
        reward = 0
        if self.episode_length == self.max_steps: # episode too long
            #print('Reached max steps')
            reward -=1
            self.total_reward -=1
            terminated = True
            done = True
            y_actual = self.y
            y_pred = np.nan
        elif action < self.num_classes: #diagnosis (terminal action)
            #print('Terminal action')
            if action == self.y:
                reward +=1
                self.total_reward += 1
            else:
                reward -= 1
                self.total_reward -= 1
            terminated = False
            done = True
            y_actual = self.y
            y_pred = action
        elif self.actions[action] in self.trajectory: #action already picked 
            #print('Repeated action')
            terminated = False
            reward -= 1
            self.total_reward -= 1
            done = False
            y_actual = np.nan
            y_pred = np.nan
        else: #new feature being acquired
            #print('Acquiring new feature')
            terminated = False
            reward += 1
            self.total_reward += 1
            done = False
            self.state = self.get_next_state(action-self.num_classes)
            y_actual = np.nan
            y_pred = np.nan
        self.trajectory.append(self.actions[action])
        info = {'index': self.idx, 'episode_length':self.episode_length, 'reward': self.total_reward, 'y_pred': y_pred, 
                'y_actual': y_actual, 'trajectory':self.trajectory, 'terminated':terminated}
        #self.render()
        return self.state, reward, done, info
            
    
    def render(self):
        print(f'STEP {self.episode_length} for index {self.idx}')
        print(f'x: {self.x}')
        print(f'y: {self.y}')
        print(f'Current state: {self.state}')
        print(f'Total reward: {self.total_reward}')
        print(f'Trajectory: {self.trajectory}')
        
            
    
    def reset(self):
        #print('RESETTING THE ENVIRONMENT')
        if self.random:
            self.idx = random.randint(0, self.sample_num-1)
        else:
            self.idx += 1
            if self.idx == len(self.X):
                raise StopIteration()
        #print(f'New idx: {self.idx}')
        self.x, self.y = self.X[self.idx], self.Y[self.idx]
        #print(f'New x: {self.x}')
        #print(f'New y: {self.y}')
        self.state = np.zeros((3,), dtype=np.float32)
        #print(f'New state: {self.state}')
        self.trajectory = []
        #print(f'New trajectory: {self.trajectory}')
        self.episode_length = 0
        #print(f'New episode length: {self.episode_length}')
        self.total_reward = 0
        #print(f'New total reward: {self.total_reward}')
        return self.state
        
    
    def get_next_state(self, feature_idx):
        self.x = self.x.reshape(-1, 3)
        x_value = self.x[0, feature_idx]
        next_state = copy.deepcopy(self.state)
        next_state[feature_idx] = x_value
        return next_state

#### The Agent

In [6]:
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
#from stable_baselines import DQN
from stable_baselines import deepq
import tensorflow as tf
from stable_baselines import bench, logger

  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [10]:
def stable_dqn():
    env = SyntheticEnv(X_train, y_train)
    env = bench.Monitor(env, logger.get_dir())
    model = deepq.DQN.learn(MlpPolicy, env)
    model.save()
    env.close()
    return model
def stable_dqn():
    env = SyntheticEnv(X_train, y_train)
    env = bench.Monitor(env, logger.get_dir())
    model = DQN('MlpPolicy', training_env, verbose=1)
    model.learn(total_timesteps=int(1.2e5), log_interval=10000)
    model.save('models/synthetic_stable_double.pkl')
    env.close()
    return model

dqn_model = stable_dqn()

AttributeError: type object 'MlpPolicy' has no attribute '_init_num_timesteps'

#### Performance Evaluation

In [None]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve

In [None]:
def multiclass(actual_class, pred_class, average = 'macro'):

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [None]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [None]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [None]:
def synthetic_dqn_eval(dqn_model):
    attempts, correct = 0,0
    test_df = pd.DataFrame()

    env = SyntheticEnv(X_test, y_test, random=False)

    try:
        while True:
            obs, done = env.reset(), False
            while not done:
                obs, rew, done,info = env.step(dqn_model(obs[None])[0])
                #if (done==True) & (np.isfinite(info['y_pred'])):
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
                #print('....................TEST DF ....................')
                #if len(test_df) != 0:
                #    print(test_df.head())

    except StopIteration:
        print('Testing done.....')
    return test_df

test_df = synthetic_dqn_eval(dqn_model)

In [None]:
len(X_test), len(test_df)

In [None]:
y_pred_df = test_df[test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

In [None]:
success_rate = len(success_df)/len(test_df)*100
success_rate

In [None]:
#avg length and return 
avg_length, avg_return = get_avg_length_reward(test_df)
avg_length, avg_return

In [None]:
acc, f1, roc_auc = test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc