# Reinforcement Learning based Virtual Screening
Protein-Ligan pose prediction using RL

## Spatial block arrangement using RL CNN-DQN
__Input__
- Sandbox with block and the surface placemnt

__Output__
- <x, y, $\theta$> for block wrt Sandbox

In [None]:
from block_world.env import Env
from agents.ddpg_agent import DDPGAgent
from matplotlib import pyplot as plt
import numpy as np

### Generating the sandbox
The block world generates a block and places it in the surface by randomizing <x, y, $\theta$>. The $\theta$ rotated block is stored in the _block_ property of the Block class.

Both the block and the surface are combined together into a single sandbox image. (More here)

In [None]:
action_bounds = [[0, 0, -180], [36, 36, 180]]
env = Env(action_bounds)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

In [None]:
print(env.block.rotate_angle, env.block.shift_x, env.block.shift_y)

In [None]:
xx = env.action_space.sample()
state, reward, t = env.step(xx)
plt.imshow(env.block.sandbox)
plt.show()
print(reward, xx, env.action_space.action_bounds.shape)

### Converting sandbox image to tensors for CNN consumption
The sandbox image needs to be converted into an image consumable by the CNN

In [None]:
agent = DDPGAgent(env)

In [None]:
actions =  agent.play(10)

In [None]:
env = agent.env
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

### Creating the model
The model is a 4 layer 2D convolution network with 3 Dense layers.

The last layer of the NN outputs a linear output, which would help in the multi-output, continous problem.

In [None]:
def create_model(input_shape, output_count):
    model = Sequential()

    model.add(Conv2D(32, 3, 3, border_mode='same', input_shape=input_shape, activation='relu'))
    model.add(Conv2D(32, 3, 3, border_mode='same', input_shape=input_shape, activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(Conv2D(64, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(Conv2D(128, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(Conv2D(256, 3, 3, border_mode='same', activation='relu'))
    model.add(MaxPooling2D(pool_size=(2, 2)))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(output_count, activation='tanh'))

    model.compile(loss='binary_crossentropy',
            optimizer=RMSprop(lr=0.0001),
            metrics=['accuracy'])

    return model

### Pose Selection
The get_pose returns the <x, y, $\theta$> from the neural network.

In [None]:
def get_pose(block, input_shape, model):
    states = np.array([np.expand_dims(block.sandbox.reshape(input_shape), axis=0),
                       np.expand_dims(block.sandbox.reshape(input_shape), axis=0),
                       np.expand_dims(block.sandbox.reshape(input_shape), axis=0)])
    pose = np.array([model.predict(state) for state in states]) * np.array([*block.sandbox.shape, 180])
    # correct pose maybe?
    return pose

In [None]:
model = create_model(input_shape, 3)
new_pose = get_pose(block, input_shape, model)

In [None]:
print(new_pose)
np.round(np.abs(new_pose)).astype(int)

In [None]:
block.update_sandbox(*new_pose)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(block.sandbox)
ax2.imshow(block._bkp_surface)
plt.show()

In [None]:
import random

[random.uniform(*bounds) for bounds in agent.action_bounds.transpose()]


In [None]:
x.reshape(1, 3, 3)

In [None]:
x.reshape(3, 3, 1)