In [20]:
import pandas as pd
import numpy as np
import random
import os
import tensorflow
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

In [21]:
SEED = 42
random.seed(SEED)
np.random.seed(SEED)
tensorflow.set_random_seed(SEED)
os.environ['PYTHONHASHSEED']=str(SEED)

#### The data

In [22]:
df = pd.read_csv('data/dataset_10000.csv')
class_dict = {'A':0, 'B':1, 'C':2}
df['label'] = df['label'].replace(class_dict)
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, stratify=y, random_state=42)
scaler = MinMaxScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
X_train, y_train = np.array(X_train), np.array(y_train)
X_test, y_test = np.array(X_test), np.array(y_test)

In [23]:
np.unique(y_test)

array([0, 1, 2], dtype=int64)

#### The Environment

In [24]:
import copy
from gym import Env
from gym.spaces import Discrete, Box

In [25]:
class SyntheticEnv(Env):
    def __init__(self, X, Y):
        super(SyntheticEnv, self).__init__()
        self.action_space = Discrete(6)
        self.observation_space = Box(0, 1.5, (3,))
        self.actions = ['A', 'B', 'C', 'length', 'width', 'height']
        self.max_steps = 7
        self.X = X
        self.Y = Y
        self.sample_num = len(X)
        self.idx = 0
        self.x = np.zeros((3,), dtype=np.float32)
        self.y = np.nan
        self.state = np.zeros((3,), dtype=np.float32)
        self.num_classes = 3
        self.episode_length = 0
        self.trajectory = []
        self.total_reward = 0
        
    
    def step(self, action):
        self.episode_length += 1
        reward = 0
        if self.episode_length == self.max_steps: # episode too long
            #print('Reached max steps')
            reward -=1
            self.total_reward -=1
            done = True
            y_actual = np.nan
            y_pred = np.nan
        elif action < self.num_classes: #diagnosis (terminal action)
            if action == self.y:
                reward +=1
                self.total_reward += 1
            else:
                reward -= 1
                self.total_reward -= 1
            done = True
            y_actual = self.y
            y_pred = action
        elif self.actions[action] in self.trajectory: #action already picked 
            #print('Repeated action')
            reward -= 1
            self.total_reward -= 1
            done = False
            y_actual = np.nan
            y_pred = np.nan
        else: #new feature being acquired
            reward += 1
            self.total_reward += 1
            done = False
            self.state = self.get_next_state(action-self.num_classes)
            y_actual = np.nan
            y_pred = np.nan
        self.trajectory.append(self.actions[action])
        info = {'index': self.idx, 'episode_length':self.episode_length, 'reward': self.total_reward, 'y_pred': y_pred, 
                'y_actual': y_actual, 'trajectory':self.trajectory}
        return self.state, reward, done, info
            
    
    def render(self):
        print(f'STEP {self.episode_length} for index {self.idx}')
        print(f'x: {self.x}')
        print(f'y: {self.y}')
        print(f'Current state: {self.state}')
        print(f'Total reward: {self.total_reward}')
        print(f'Trajectory: {self.trajectory}')
        
            
    
    def reset(self):
        self.idx = random.randint(0, self.sample_num-1)
        self.x, self.y = self.X[self.idx], self.Y[self.idx]
        self.state = np.zeros((3,), dtype=np.float32)
        self.trajectory = []
        self.episode_length = 0
        self.total_reward = 0
        return self.state
        
    
    def get_next_state(self, feature_idx):
        self.x = self.x.reshape(-1, 3)
        x_value = self.x[0, feature_idx]
        next_state = copy.deepcopy(self.state)
        next_state[feature_idx] = x_value
        return next_state

In [26]:
env = SyntheticEnv(X_train, y_train)



In [27]:
episodes = 10
for episode in range(1, episodes+1):
    state = env.reset()
    #print(f'EPISODE {episode}')
    #env.render()
    done = False
    score = 0
    
    while not done:
        #env.render()
        action = env.action_space.sample()
        #print(f'action: {action}')
        n_state, reward, done, info = env.step(action)
#         print(f'next state : {n_state}')
#         print(f'reward: {reward}')
#         print(f'done: {done}')
#         print(f'info: {info}')
        score+=reward
    print(f'Episode {episode}, Score: {score}')

Episode 1, Score: -1
Episode 2, Score: -1
Episode 3, Score: 1
Episode 4, Score: 1
Episode 5, Score: 0
Episode 6, Score: -2
Episode 7, Score: -1
Episode 8, Score: 1
Episode 9, Score: -1
Episode 10, Score: -1


#### The Deep Learning Model

In [28]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Input, Flatten
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import Model

Using TensorFlow backend.


In [29]:
states = env.observation_space.shape
actions = env.action_space.n
states, actions

((3,), 6)

In [30]:
actions

6

In [31]:
def build_model(states, actions):
    model = Sequential()
    model.add(Dense(24, activation='relu', input_shape=states))
    model.add(Dense(24, activation='relu'))
    model.add(Dense(actions, activation='linear'))
    print(model.summary())
    return model

def create_model(states, actions):
        model = tensorflow.keras.Sequential()
        model.add(Dropout(0.5, input_shape=states))
        model.add(Dense(64, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(32, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(8, activation='relu'))
        model.add(Dropout(0.5))
        model.add(Dense(actions, activation='linear'))
        print(model.summary())
        #model.compile(loss='mse', optimizer=Adam(lr=0.001), metrics=['accuracy'])
        return model
    
def build_model2(state_size, num_actions):
    input = Input(shape=(1,state_size))
    x = Flatten()(input)
    x = Dense(16, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    x = Dense(16, activation='relu')(x)
    output = Dense(num_actions, activation='linear')(x)
    model = Model(inputs=input, outputs=output)
    print(model.summary())
    return model

In [32]:
model1 = build_model(states, actions)
model2 = create_model(states, actions)
model3 = build_model2(3, actions)

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_11 (Dense)             (None, 24)                96        
_________________________________________________________________
dense_12 (Dense)             (None, 24)                600       
_________________________________________________________________
dense_13 (Dense)             (None, 6)                 150       
Total params: 846
Trainable params: 846
Non-trainable params: 0
_________________________________________________________________
None
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dropout_4 (Dropout)          (None, 3)                 0         
_________________________________________________________________
dense_14 (Dense)             (None, 64)                256       
_________________________________________________________________
dropout

#### Keras-rl Agent

In [33]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [34]:
def build_agent(model, actions):
    policy = BoltzmannQPolicy()
    #policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', nb_steps=10000)
    memory = SequentialMemory(limit=50000, window_length=1)
    dqn = DQNAgent(model=model, memory=memory, policy=policy, nb_actions=actions, target_model_update=1e-2)
    return dqn

In [35]:
mydqn = build_agent(model3, actions)
mydqn.compile(Adam(lr=1e-3), metrics=['mae'])

AttributeError: 'Adam' object has no attribute '_name'

In [None]:
scores = mydqn.test(env, nb_episodes=100, visualize=False)
print(np.mean(scores.history['episode_reward']))