In [1]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


#### Data with random zeros

In [2]:
nn_df = pd.read_csv('data/dataset_10000.csv')

In [3]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
#tensorflow.random.set_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

In [65]:
import tensorflow as tf
a = np.array([-1, 0.2, -0.35, 4.8, 3.6, -4.6], dtype=np.float32)
b = np.array([[0,1,0,0,0,0], [0,0,1,0,0,0], [0,0,0,1,0,0], [0,0,0,0,1,0], [0,0,1,0,0,0],[0,1,0,0,0,0], [0,0,1,0,0,0]], 
             dtype=np.float32)
c= a*b
c

array([[-0.  ,  0.2 , -0.  ,  0.  ,  0.  , -0.  ],
       [-0.  ,  0.  , -0.35,  0.  ,  0.  , -0.  ],
       [-0.  ,  0.  , -0.  ,  4.8 ,  0.  , -0.  ],
       [-0.  ,  0.  , -0.  ,  0.  ,  3.6 , -0.  ],
       [-0.  ,  0.  , -0.35,  0.  ,  0.  , -0.  ],
       [-0.  ,  0.2 , -0.  ,  0.  ,  0.  , -0.  ],
       [-0.  ,  0.  , -0.35,  0.  ,  0.  , -0.  ]], dtype=float32)

In [66]:
np.sum(c, axis=1)

array([ 0.2 , -0.35,  4.8 ,  3.6 , -0.35,  0.2 , -0.35], dtype=float32)

#### The Data

In [4]:
# #randomly replacing some values in dataset with 0s
# def insert_zeros(arr):
#     df = pd.DataFrame(arr)
# #     df['width'] = df['width'].sample(frac=0.7)
# #     df['height'] = df['height'].sample(frac=0.35)
# #     for col in df.columns:
# #         df[col] = df[col].sample(frac=0.6)
#     print(df.isna().sum())
#     df.fillna(0, inplace=True)
#     #print(df.isna().sum())
#     return np.array(df)

In [45]:
df.height.value_counts()[0]

18

In [5]:
df = pd.read_csv('data/dataset_10000_missing.csv')
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=SEED)
#X_train = insert_zeros(X_train)
#X_test = insert_zeros(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((6941, 3), (2976, 3), (6941,), (2976,))

In [6]:
X_train[:10]

array([[27,  8, 54],
       [15,  0, 43],
       [17,  2, 83],
       [15,  8, 19],
       [ 7, 10, 15],
       [ 9,  7, 85],
       [17,  1, 58],
       [20, 13, 80],
       [15,  8, 24],
       [ 2,  9, 82]], dtype=int64)

#### The Environment

In [7]:
import copy
from gym import Env
from gym.spaces import Discrete, Box

In [8]:
class SyntheticEnv(Env):
    def __init__(self, X, Y, random=True):
        super(SyntheticEnv, self).__init__()
        self.action_space = Discrete(6)
        self.observation_space = Box(0, 1.5, (3,))
        self.actions = ['A', 'B', 'C', 'length', 'width', 'height']
        self.max_steps = 7
        self.X = X
        self.Y = Y
        self.sample_num = len(X)
        self.idx = -1
        self.x = np.zeros((3,), dtype=np.float32)
        self.y = np.nan
        self.state = np.zeros((3,), dtype=np.float32)
        self.num_classes = 3
        self.episode_length = 0
        self.trajectory = []
        self.total_reward = 0
        self.random = random
        
    
    def step(self, action):
        #print('A step in the environment')
        #print(f'action: {action}')
        self.episode_length += 1
        reward = 0
        if self.episode_length == self.max_steps: # episode too long
            #print('Reached max steps')
            reward -=1
            self.total_reward -=1
            terminated = True
            done = True
            y_actual = self.y
            y_pred = np.nan
            is_success = False
        elif action < self.num_classes: #diagnosis (terminal action)
            #print('Terminal action')
            if action == self.y:
                reward +=1
                self.total_reward += 1
                is_success = True
            else:
                reward -= 1
                self.total_reward -= 1
                is_success = False
            terminated = False
            done = True
            y_actual = self.y
            y_pred = action
        elif self.actions[action] in self.trajectory: #action already picked 
            #print('Repeated action')
            terminated = False
            reward -= 1
            self.total_reward -= 1
            done = False
            y_actual = np.nan
            y_pred = np.nan
            is_success = None
        else: #new feature being acquired
            #print('Acquiring new feature')
            terminated = False
            reward += 1
            self.total_reward += 1
            done = False
            self.state = self.get_next_state(action-self.num_classes)
            y_actual = np.nan
            y_pred = np.nan
            is_success = None
        self.trajectory.append(self.actions[action])
        info = {'index': self.idx, 'episode_length':self.episode_length, 'reward': self.total_reward, 'y_pred': y_pred, 
                'y_actual': y_actual, 'trajectory':self.trajectory, 'terminated':terminated, 'is_success': is_success}
        #self.render()
        return self.state, reward, done, info
            
    
    def render(self):
        print(f'STEP {self.episode_length} for index {self.idx}')
        print(f'x: {self.x}')
        print(f'y: {self.y}')
        print(f'Current state: {self.state}')
        print(f'Total reward: {self.total_reward}')
        print(f'Trajectory: {self.trajectory}')
        
            
    
    def reset(self):
        #print('RESETTING THE ENVIRONMENT')
        if self.random:
            self.idx = random.randint(0, self.sample_num-1)
        else:
            self.idx += 1
            if self.idx == len(self.X):
                raise StopIteration()
        #print(f'New idx: {self.idx}')
        self.x, self.y = self.X[self.idx], self.Y[self.idx]
        #print(f'New x: {self.x}')
        #print(f'New y: {self.y}')
        self.state = np.zeros((3,), dtype=np.float32)
        #print(f'New state: {self.state}')
        self.trajectory = []
        #print(f'New trajectory: {self.trajectory}')
        self.episode_length = 0
        #print(f'New episode length: {self.episode_length}')
        self.total_reward = 0
        #print(f'New total reward: {self.total_reward}')
        return self.state
        
    
    def get_next_state(self, feature_idx):
        self.x = self.x.reshape(-1, 3)
        x_value = self.x[0, feature_idx]
        next_state = copy.deepcopy(self.state)
        next_state[feature_idx] = x_value
        return next_state

In [9]:
training_env = SyntheticEnv(X_train, y_train)

#### The Agent

In [10]:
from stable_baselines.common.env_checker import check_env
from stable_baselines.common.policies import MlpPolicy
from stable_baselines.common.vec_env import DummyVecEnv
from stable_baselines import PPO2
from stable_baselines import DQN
from stable_baselines import bench, logger

  "stable-baselines is in maintenance mode, please use [Stable-Baselines3 (SB3)](https://github.com/DLR-RM/stable-baselines3) for an up-to-date version. You can find a [migration guide](https://stable-baselines3.readthedocs.io/en/master/guide/migration.html) in SB3 documentation."


In [11]:
def stable_dqn():
    env = SyntheticEnv(X_train, y_train)
    env = bench.Monitor(env, logger.get_dir())
    model = DQN('MlpPolicy', training_env, verbose=1, seed=SEED, n_cpu_tf_sess=1)
    model.learn(total_timesteps=int(1.2e5), log_interval=10000)
    #model.save('models/synthetic_stable_dqn.pkl')
    env.close()
    return model

dqn_model = stable_dqn()

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use keras.layers.flatten instead.
Instructions for updating:
Use tf.cast instead.
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 10000    |
| mean 100 episode reward | 3.4      |
| steps                   | 36067    |
| success rate            | 0.91     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 20000    |
| mean 100 episode reward | 3.5      |
| steps                   | 77795    |
| success rate            | 0.92     |
--------------------------------------
--------------------------------------
| % time spent exploring  | 2        |
| episodes                | 30000    |
| mean 100 episode reward | 3.5      |
| steps                   | 119454   |
| success rate            | 0.89     |
--------------------------------------


#### Performance Evaluation

In [12]:
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, auc, roc_curve

In [13]:
def multiclass(actual_class, pred_class, average = 'macro'):

    unique_class = set(actual_class)
    roc_auc_dict = {}
    for per_class in unique_class:
        other_class = [x for x in unique_class if x != per_class]
        new_actual_class = [0 if x in other_class else 1 for x in actual_class]
        new_pred_class = [0 if x in other_class else 1 for x in pred_class]
        roc_auc = roc_auc_score(new_actual_class, new_pred_class, average = average)
        roc_auc_dict[per_class] = roc_auc
    avg = sum(roc_auc_dict.values()) / len(roc_auc_dict)
    return avg

In [14]:
def test(ytest, ypred):
    acc = accuracy_score(ytest, ypred)
    f1 = f1_score(ytest, ypred, average ='macro', labels=np.unique(ytest))
    try:
        roc_auc = multiclass(ytest, ypred)
    except:
        roc_auc = None
    return acc, f1, roc_auc

In [15]:
def get_avg_length_reward(df):
    length = np.mean(df.episode_length)
    reward = np.mean(df.reward)
    return length, reward

In [16]:
def synthetic_dqn_eval(dqn_model):
    attempts, correct = 0,0
    test_df = pd.DataFrame()

    env = SyntheticEnv(X_test, y_test, random=False)

    try:
        while True:
            obs, done = env.reset(), False
            while not done:
                action, _states = dqn_model.predict(obs, deterministic=True)
                obs, rew, done,info = env.step(action)
                #if (done==True) & (np.isfinite(info['y_pred'])):
                if done == True:
                    test_df = test_df.append(info, ignore_index=True)
                #print('....................TEST DF ....................')
                #if len(test_df) != 0:
                #    print(test_df.head())

    except StopIteration:
        print('Testing done.....')
    return test_df

test_df = synthetic_dqn_eval(dqn_model)

Testing done.....


In [17]:
len(X_test), len(test_df)

(2976, 2976)

In [18]:
y_pred_df = test_df[test_df['y_pred'].notna()]
success_df = y_pred_df[y_pred_df['y_pred']== y_pred_df['y_actual']]
len(success_df)

2741

In [19]:
success_rate = len(success_df)/len(test_df)*100
success_rate

92.10349462365592

In [20]:
#avg length and return 
avg_length, avg_return = get_avg_length_reward(test_df)
avg_length, avg_return

(4.203629032258065, 3.579301075268817)

In [21]:
acc, f1, roc_auc = test(y_pred_df['y_actual'], y_pred_df['y_pred'])
acc, f1, roc_auc

(0.9881038211968277, 0.9221194854972999, 0.9341224232641402)

#### Failing episodes

In [22]:
test_df.y_pred.value_counts()

1.0    2248
2.0     503
0.0      23
Name: y_pred, dtype: int64

In [23]:
X_test[:20]

array([[16, 10,  8],
       [28,  5, 23],
       [23, 14, 97],
       [26, 13, 30],
       [ 9,  5, 16],
       [24, 14, 87],
       [23,  2, 98],
       [ 5,  8, 21],
       [13,  1, 79],
       [12,  3, 72],
       [24,  7, 61],
       [ 2, 12, 78],
       [14,  2, 46],
       [16, 11,  7],
       [15, 12, 94],
       [18,  1, 15],
       [ 6, 10, 47],
       [11, 13, 99],
       [ 5, 12, 81],
       [15,  9,  7]], dtype=int64)

In [24]:
test_df.head(20)

Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
0,4.0,0.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
1,4.0,1.0,1.0,4.0,0.0,"[height, length, width, B]",1.0,1.0
2,4.0,2.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
3,4.0,3.0,1.0,4.0,0.0,"[height, length, width, B]",1.0,1.0
4,4.0,4.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
5,4.0,5.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
6,4.0,6.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
7,4.0,7.0,1.0,4.0,0.0,"[height, length, width, B]",1.0,1.0
8,4.0,8.0,1.0,4.0,0.0,"[height, width, length, B]",1.0,1.0
9,4.0,9.0,1.0,4.0,0.0,"[height, length, width, B]",1.0,1.0


In [25]:
#episodes that reach max_steps and are terminated forcefully
na_df = test_df[test_df['y_pred'].isna()]
na_df

Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
28,7.0,28.0,0.0,-1.0,1.0,"[height, length, width, width, width, width, w...",2.0,
49,7.0,49.0,0.0,-3.0,1.0,"[height, width, width, width, width, width, wi...",1.0,
51,7.0,51.0,0.0,-1.0,1.0,"[height, length, width, width, width, width, w...",1.0,
52,7.0,52.0,0.0,-5.0,1.0,"[height, height, height, height, height, heigh...",2.0,
79,7.0,79.0,0.0,-3.0,1.0,"[height, width, width, width, width, width, wi...",1.0,
...,...,...,...,...,...,...,...,...
2882,7.0,2882.0,0.0,-3.0,1.0,"[height, width, width, width, width, width, wi...",2.0,
2894,7.0,2894.0,0.0,-3.0,1.0,"[height, width, width, width, width, width, wi...",2.0,
2906,7.0,2906.0,0.0,-3.0,1.0,"[height, width, width, width, width, width, wi...",2.0,
2925,7.0,2925.0,0.0,-1.0,1.0,"[height, length, width, width, width, width, w...",2.0,


In [26]:
na_df.index

Int64Index([  28,   49,   51,   52,   79,  102,  119,  142,  157,  163,
            ...
            2794, 2819, 2832, 2871, 2874, 2882, 2894, 2906, 2925, 2954],
           dtype='int64', length=202)

In [27]:
y_test[2734]

2

In [28]:
len(y_pred_df)

2774

In [29]:
#episodes that are misdiagnosed
non_success_df = y_pred_df[y_pred_df['y_pred']!= y_pred_df['y_actual']]
non_success_df

Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
181,4.0,181.0,0.0,2.0,0.0,"[height, width, length, B]",0.0,1.0
312,4.0,312.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
327,4.0,327.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
372,4.0,372.0,0.0,2.0,0.0,"[height, width, length, C]",1.0,2.0
373,4.0,373.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
380,4.0,380.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
548,4.0,548.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
565,4.0,565.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
577,4.0,577.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
686,4.0,686.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0


In [30]:
non_success_df.index

Int64Index([ 181,  312,  327,  372,  373,  380,  548,  565,  577,  686,  781,
             843,  884,  987, 1019, 1129, 1133, 1356, 1588, 1605, 1611, 1623,
            1828, 1895, 1967, 2080, 2136, 2304, 2372, 2391, 2659, 2896, 2932],
           dtype='int64')

In [31]:
b_non_succ_df = non_success_df[non_success_df.y_pred==1]
b_non_succ_df

Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
181,4.0,181.0,0.0,2.0,0.0,"[height, width, length, B]",0.0,1.0
312,4.0,312.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
327,4.0,327.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
380,4.0,380.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
565,4.0,565.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
577,4.0,577.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
686,4.0,686.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
843,4.0,843.0,0.0,2.0,0.0,"[height, length, width, B]",2.0,1.0
884,4.0,884.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0
987,4.0,987.0,0.0,2.0,0.0,"[height, width, length, B]",2.0,1.0


In [32]:
c_non_succ_df = non_success_df[non_success_df.y_pred==2]
c_non_succ_df

Unnamed: 0,episode_length,index,is_success,reward,terminated,trajectory,y_actual,y_pred
372,4.0,372.0,0.0,2.0,0.0,"[height, width, length, C]",1.0,2.0
373,4.0,373.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
548,4.0,548.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
781,4.0,781.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
1019,4.0,1019.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
1605,4.0,1605.0,0.0,2.0,0.0,"[height, width, length, C]",0.0,2.0
1828,4.0,1828.0,0.0,2.0,0.0,"[height, length, width, C]",0.0,2.0
1895,4.0,1895.0,0.0,2.0,0.0,"[height, width, length, C]",1.0,2.0
2304,4.0,2304.0,0.0,2.0,0.0,"[height, width, length, C]",1.0,2.0
2932,4.0,2932.0,0.0,2.0,0.0,"[height, width, length, C]",1.0,2.0
