# Reinforcement Learning based Virtual Screening
Protein-Ligan pose prediction using RL

## Spatial block arrangement using RL CNN-DQN
__Input__
- Sandbox with block and the surface placemnt

__Output__
- <x, y, $\theta$> for block wrt Sandbox

### Generating the sandbox
The block world generates a block and places it in the surface by randomizing <x, y, $\theta$>. The $\theta$ rotated block is stored in the _block_ property of the Block class.

Both the block and the surface are combined together into a single sandbox image. (More here)

In [None]:
from rlvs.block_world.env import Env
import numpy as np
from matplotlib import pyplot as plt
env = Env()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

### Update sandbox
Sandbox is updated with $\delta$x, $\delta$y and $\delta\theta$

In [None]:
block = env.block
env.block.update_sandbox(-10, -10, -180)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

### Perform env step
Sandbox can be updated by generating an array consisting of $\delta$x, $\delta$y and $\delta\theta$, which returns the reward and the next state and whether the terminal state has been reached

In [None]:
xx = env.action_space.sample()
env.block.block_x
state, reward, t = env.step(xx)
plt.imshow(env.block.sandbox)
plt.show()
print(reward, xx, env.action_space.action_bounds.shape)

### Absolute Fit
The absolute fit is when the block is placed square on top of the slot with $d \leq 0.1$


In [None]:
env = Env()
block = env.block
print(block._max_dist)
print([block.shift_x, block.block_x, block.shift_y, block.block_y])
xx = [block.shift_x - block.block_x-10, block.shift_y - block.block_y -10, 0]
state, reward, t = env.step(xx)

print([block.shift_x, block.block_x, block.shift_y, block.block_y])
print(reward, xx, block.distance(), block.prev_dist)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)

print([block.shift_x, block.block_x, block.shift_y, block.block_y])

xx = [block.shift_x - block.block_x-0.5, block.shift_y - block.block_y -0.5, -block.rotate_angle + 0.1]
state, reward, t = env.step(xx)

print([block.shift_x, block.block_x, block.shift_y, block.block_y])
print(reward, xx, block.distance(), block.prev_dist)

fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)


### Agent Training

In [None]:
from rlvs.block_world.env import Env
from rlvs.agents.ddpg_agent import DDPGAgent
import numpy as np
env = Env()
agent = DDPGAgent(env)
actions =  agent.play(10000)

(60, 60, 1)
(60, 60, 1)
(60, 60, 1)
(60, 60, 1)
Action: [-0.        -1.         0.0757173] Reward: 0 E_i: 1 Block state: [29, 32, -26.08, 30, 23] Dist: 9.0812


  actor_weights = np.array(self.actor_target.get_weights())
  target_weights = np.array(self.actor.get_weights())
  model_weights = np.array(self.critic.get_weights())
  target_weights = np.array(self.critic_target.get_weights())


Action: [-0.         -3.          2.15984845] Reward: 0.5638 E_i: 2 Block state: [29, 29, -28.24, 30, 23] Dist: 6.113
Action: [-0.         -2.          2.05474806] Reward: 0.6261 E_i: 3 Block state: [29, 27, -30.29, 30, 23] Dist: 4.1578
Action: [ 1.         -2.          1.42396927] Reward: 0.7189 E_i: 4 Block state: [30, 25, -31.71, 30, 23] Dist: 2.0381
Action: [ 1.         -2.         -0.12744871] Reward: 0.7854 E_i: 5 Block state: [31, 23, -31.59, 30, 23] Dist: 1.0378
Action: [ 1.         -2.          0.04845924] Reward: 0 E_i: 6 Block state: [32, 21, -31.64, 30, 23] Dist: 2.8663
Action: [ 0.         -1.         -0.89593828] Reward: 0 E_i: 7 Block state: [32, 20, -30.74, 30, 23] Dist: 3.6413
Action: [-1.         -1.         -0.00758835] Reward: 0 E_i: 8 Block state: [31, 19, -30.73, 30, 23] Dist: 4.1589


In [None]:
env = agent.env
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
env.block.update_sandbox()
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()
print(env.block.block_x, env.block.block_y, env.block.shift_x, env.block.shift_y)