# Reinforcement Learning based Virtual Screening
Protein-Ligan pose prediction using RL

## Spatial block arrangement using RL CNN-DQN
__Input__
- Sandbox with block and the surface placemnt

__Output__
- <x, y, $\theta$> for block wrt Sandbox

In [None]:
from block_world.env import Env
from agents.ddpg_agent import DDPGAgent
from matplotlib import pyplot as plt
import numpy as np

### Generating the sandbox
The block world generates a block and places it in the surface by randomizing <x, y, $\theta$>. The $\theta$ rotated block is stored in the _block_ property of the Block class.

Both the block and the surface are combined together into a single sandbox image. (More here)

In [None]:
env = Env()
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

In [None]:
block = env.block
env.block.update_sandbox(block.shift_x - block.block_x-1, block.shift_y - block.block_y -1, 0)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()

In [None]:
print(env.block.distance(env.block.block_x, env.block.block_y, 0))

In [None]:
xx = env.action_space.sample()
env.block.block_x
state, reward, t = env.step(xx)
plt.imshow(env.block.sandbox)
plt.show()
print(reward, xx, env.action_space.action_bounds.shape)

In [None]:
block = env.block
print(env.block.block_x, env.block.block_y, block.shift_x, block.shift_y, [block.shift_x- block.block_x, block.shift_y - block.block_y, block.rotate_angle])

### Absolute Fit
The absolute fit is when the block is placed square on top of the slot.


  [block.shift_x- block.block_x, block.shift_y - block.block_y, block.rotate_angle]


In [None]:
block = env.block
xx = [50, 50, -180]
state, reward, t = env.step(xx)
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
print(reward, xx, env.action_space.action_bounds.shape)

### Converting sandbox image to tensors for CNN consumption
The sandbox image needs to be converted into an image consumable by the CNN

In [None]:
from block_world.env import Env
from agents.ddpg_agent import DDPGAgent
import numpy as np
env = Env()
agent = DDPGAgent(env)
actions =  agent.play(10000)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])





  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
Using TensorFlow backend.
  conv_model_1 = Conv2D(64, 5, 5,  activation='relu')(conv_model)
  conv_model_1 = Conv2D(64, 4, 4,  activation='relu')(conv_model_1)
  conv_model_1 = Conv2D(64, 3, 3,  activation='relu')(conv_model_1)








  conv_model_1 = Conv2D(64, 5, 5,  activation='relu')(conv_model)
  conv_model_1 = Conv2D(64, 4, 4,  activation='relu')(conv_model_1)
  conv_model_1 = Conv2D(64, 3, 3,  activation='relu')(conv_model_1)


False [ 0.         -1.         -0.86101228] 0 1 [44, 11, 148.86101227998734, 28, 32, 73.74879720914709]


  actor_weights = np.array(self.actor_target.get_weights())
  target_weights = np.array(self.actor.get_weights())
  model_weights = np.array(self.critic.get_weights())
  target_weights = np.array(self.critic_target.get_weights())


False [ 0.         -2.         -0.47330749] 0.20152237003318252 2 [44, 9, 149.33431977033615, 28, 32, 72.5306547237095]
False [ 0.         -1.         -2.07675457] 0 3 [44, 8, 151.41107434034348, 28, 32, 72.71871964082193]
False [-0.         -2.         -2.40163946] 0.20211415739614325 4 [44, 6, 153.8127138018608, 28, 32, 72.39634016287614]
False [-1.         -2.         -2.06717062] 0.2073551589173117 5 [43, 4, 155.8798844218254, 28, 32, 71.21333216174395]
False [-0.        -3.        -2.0756104] 0.21158707101549246 6 [43, 1, 157.95549482107162, 28, 32, 70.26661910956886]
False [ 1.         -3.         -2.22160292] -1 7 [43, 1, 157.95549482107162, 28, 32, 70.26661910956886]
Episode: 1 Return: -0.17742124263787007 episode_length: 7 stats (m, s) [0, 0]
False [-2.         -0.          0.75582552] 0 1 [41, 34, 15.244174480438232, 23, 30, 15.574078351886854]
False [-2.         -0.          1.30189109] 0.5791085206077455 2 [39, 34, 13.942283391952515, 23, 30, 14.070569139609756]
False [-2. 

Episode: 17 Return: -1 episode_length: 4 stats (m, s) [0, 0]
False [-8. -6. 10.] 0 1 [10, 28, -35.0, 34, 21, 10.462617944600629]
False [-8. -6. 10.] 0 2 [2, 22, -45.0, 34, 21, 18.358432310708185]
False [-8. -7. 10.] -1 3 [2, 22, -45.0, 34, 21, 18.358432310708185]
Episode: 18 Return: -1 episode_length: 3 stats (m, s) [0, 0]
False [-8. -8. 10.] 0 1 [4, 45, -142.0, 22, 26, 63.97408007777405]
False [-8. -8. 10.] -1 2 [4, 45, -142.0, 22, 26, 63.97408007777405]
Episode: 19 Return: -1 episode_length: 2 stats (m, s) [0, 0]
False [-8. -7. 10.] 0 1 [13, 10, 32.0, 33, 30, 30.950857243313084]
False [-8. -8. 10.] 0 2 [5, 2, 22.0, 33, 30, 39.21521844579056]
False [-8. -8. 10.] -1 3 [5, 2, 22.0, 33, 30, 39.21521844579056]
Episode: 20 Return: -1 episode_length: 3 stats (m, s) [0, 0]
False [-8. -7. 10.] 0 1 [33, 7, -141.0, 26, 27, 71.15279304285649]
False [-8.         -5.          8.58778572] 0.23664081404577775 2 [25, 2, -149.5877857208252, 26, 27, 62.281734780290584]
False [-8. -3. 10.] -1 3 [25, 2, 

In [None]:
env = agent.env
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 20))
env.block.update_sandbox()
ax1.imshow(env.block.sandbox)
ax2.imshow(env.block.original_sandbox)
plt.show()
print(env.block.block_x, env.block.block_y, env.block.shift_x, env.block.shift_y)