In [37]:
import numpy as np
import pandas as pd
import gym
import random
import torch

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import label_binarize, LabelBinarizer
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, roc_curve, auc

In [38]:
np.random.seed(42)
random.seed(42)
torch.manual_seed(42)
torch.cuda.manual_seed(42)

#### The data

In [39]:
df = pd.read_csv('data/balanced_q_dataset.csv')
X = df.iloc[:, 0:-1]
y = df.iloc[:, -1]

X_trainval, X_test, y_trainval, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)
X_train, X_val, y_train, y_val = train_test_split(X_trainval, y_trainval, test_size=0.1, stratify=y_trainval, random_state=42)

X_train, y_train = np.array(X_train), np.array(y_train)
X_val, y_val = np.array(X_val), np.array(y_val)
X_test, y_test = np.array(X_test), np.array(y_test)

#### The environment class

In [40]:
class Env:
    def __init__(self, X, Y):
        self.X = X
        self.Y = Y
        self.x = np.zeros((1, 3), dtype=np.float32)
        self.y = -1
        self.num_classes = 3
        self.sample_num = len(X)
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.total_reward = 0
        self.trajectory = []
        self.episode_length = 0
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        
    def reset(self): #I am going to go through the data sequentially
        #print(f'Current epsiode completed. Resetting to index {i}')
        #if i < self.sample_num:
        i = random.randint(0, self.env.sample_num-1)
        self.trajectory = []
        self.total_reward = 0
        self.episode_length = 0
        self.state = np.zeros((1, 3), dtype=np.float32)
        self.x, self.y = self.X[i], self.Y[i]
        self.available_actions = np.zeros((1, 6), dtype=np.float32)
        return self.state, self.available_actions
       # else:
       #     pass
        
    def get_next_state(self, action):
        self.available_actions[0, action] =1
        if action < 3: #the classes
            next_state = None
        elif (action >=3) & (action <=5):
            feature_idx = action - 3
            self.x = self.x.reshape(-1, 3)
            x_value = self.x[0, feature_idx]
            next_state = copy.deepcopy(self.state)
            next_state[0, feature_idx] = x_value
        return next_state
    
    def step(self, action):
        ep_length = 1
        reward = 0
        next_state = self.get_next_state(action)
        if action < 3:
            if action == self.y:
                reward += 1
            else:
                reward -= 1
            y_actual = self.y 
            y_pred = action
            done = True
        else:
            reward += 0
            y_actual = np.nan
            y_pred = np.nan
            done=False
            
        self.total_reward+=reward
        self.episode_length+= ep_length
        total_reward_metric = self.total_reward 
        total_length_metric = self.episode_length
        
        info = {'episode_length':total_length_metric, 'total_reward': total_reward_metric, 'y_actual':y_actual, 
                   'y_pred': y_pred}
        #print(f'The metrics: {metrics}')
        return next_state, self.available_actions, reward, done, info

#### The neural network

In [41]:
nb_actions = 6
nb_features = 3

In [42]:
model = Sequential()
model.add(Flatten(input_shape=(1, nb_features)))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 flatten_2 (Flatten)         (None, 3)                 0         
                                                                 
 dense_4 (Dense)             (None, 16)                64        
                                                                 
 activation_4 (Activation)   (None, 16)                0         
                                                                 
 dense_5 (Dense)             (None, 6)                 102       
                                                                 
 activation_5 (Activation)   (None, 6)                 0         
                                                                 
Total params: 166
Trainable params: 166
Non-trainable params: 0
_________________________________________________________________
None


In [43]:
policy = EpsGreedyQPolicy()
env = Env(X_train, y_train)
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

  super(Adam, self).__init__(name, **kwargs)


In [44]:
dqn.fit(env, nb_steps=5000, visualize=False, verbose=0)

AttributeError: 'Env' object has no attribute 'env'

In [None]:
dqn.test(env, nb_episodes=5, visualize=True)