In [1]:
# for protein structural modelling
from modeller import *
from modeller.automodel import *
from Bio.SVDSuperimposer import SVDSuperimposer
import numpy as np
import biovec
import pickle
import glob
from tqdm import tqdm
# from utils functions
from utils.encoder_decoder import *
from utils.sequence import *
from utils.reward import *
from utils.environment import *

# for deep learning
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np

# for envronment creation
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import MultiDiscrete
from gymnasium.spaces import Discrete
from gymnasium.spaces import Box

#for reading PDB files and processing them
from biopandas.pdb import PandasPdb
import pandas as pd
from utils.sequence import *

# for generating structures through esm instead of modeller
import esm
import biotite.structure as struc
import biotite.structure.io as strucio

# for general utility
import random
import os
import subprocess
import time
import matplotlib.pyplot as plt
from datetime import datetime
import pandas as pd
import numpy as np 
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.dpi'] = 200
plt.style.use('seaborn')
plt.rcParams.update({"text.usetex": True})

RuntimeError: CUDA out of memory. Tried to allocate 50.00 MiB (GPU 0; 10.91 GiB total capacity; 1.71 GiB already allocated; 53.81 MiB free; 1.78 GiB reserved in total by PyTorch) If reserved memory is >> allocated memory try setting max_split_size_mb to avoid fragmentation.  See documentation for Memory Management and PYTORCH_CUDA_ALLOC_CONF

In [5]:
def plot_rewards(array_name,window_size):
    df = pd.DataFrame(array_name,columns=['reward'])
    rolling_average_reward = df['reward'].rolling(window=window_size).mean().dropna()
    plt.plot(rolling_average_reward)
    plt.ylabel(f'Rolling average of total rewards (window = {window_size})')
    plt.xlabel('Number of episodes')

In [6]:
class PolicyNetwork():
    def __init__(self, n_state, n_action, n_hidden=50,lr=0.001,entropy_weight=0.01):
        self.model = nn.Sequential(nn.Linear(n_state, n_hidden),
                                   nn.ReLU(),
                                   nn.Linear(n_hidden, n_action),
                                   nn.Softmax(dim=-1), )
        self.optimizer = torch.optim.Adam(self.model.parameters(), lr)
        self.entropy_weight = entropy_weight
    def predict(self, s):
        return self.model(torch.Tensor(s))
    def update(self, returns, log_probs,entropies):
        policy_gradient = []
        for log_prob, Gt, entropy in zip(log_probs, returns, entropies):
            policy_gradient.append((-log_prob * Gt) + (self.entropy_weight * entropy))
        loss = torch.stack(policy_gradient).sum()
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        
    def get_action(self, s):

        probs = self.predict(s)
        action = torch.multinomial(probs, 1).item()
        log_prob = torch.log(probs[action])
        entropy = -torch.sum(probs * torch.log(probs + 1e-9))  # Calculate entropy
        return action, log_prob, entropy

In [7]:
# this is the evaluation model
def evaluate_model(estimator, env, num_episodes):
    total_reward_episode = [0] * num_episodes
    actions_taken_in_episodes = {}
    number_of_mutations_per_episode = [0] * num_episodes
    for episode in tqdm(range(num_episodes)):
        actions_taken_in_episodes[episode] = []
        rewards = []
        state, info,dummy = env.reset()
        while True:
            action, log_prob, entropy = estimator.get_action(state)
            next_state, reward, terminated, truncated, info = env.step(action)
            total_reward_episode[episode] += reward
            rewards.append(reward)
            name_of_the_protein = os.path.basename(self.path_of_template_pdb_file),
            actions_taken_in_episodes[].append(info)
            number_of_mutations_per_episode[episode]+=1
            if terminated or truncated:
                break
            state = next_state
#         print(f'{episode} episode done')
    return total_reward_episode,np.mean(total_reward_episode),np.std(total_reward_episode),actions_taken_in_episodes,number_of_mutations_per_episode

In [8]:
env = PeptideEvolution(folder_containing_pdb_files='../DrugResistance/folder_for_machine_learning/tp_30_validate',
                       structure_generator='esm_sse',
                       validation=True,
                       reward_cutoff=30,
                       unique_path_to_give_for_file='valid',
                       folder_to_save_validation_files='validation_structures')

In [9]:
n_state = env.observation_space.shape[0]
n_action = env.action_space.n
n_hidden = 128
lr = 0.0007
gamma = 0.95
entropic_factor = 0.005

In [10]:
loaded_estimator = PolicyNetwork(n_state, n_action, n_hidden, lr, entropy_weight=entropic_factor)
loaded_estimator.model.load_state_dict(torch.load('saved_models/saved_rl_model_1_lr_0.0007_gamma_0.95_ep_16000_entropic_factor_0.005.pth'))


<All keys matched successfully>

In [11]:
total_reward_array,mean_validation_reward, standard_deviation,actions_taken, mutations_array = evaluate_model(estimator=loaded_estimator, env=env,num_episodes=500)


100%|███████████████████████████████████████████████████████████████████████████████████████| 500/500 [32:54<00:00,  3.95s/it]


In [12]:
with open(f'validation_results/0.005.pkl','wb') as file:
    pickle.dump(actions_taken,file)

In [None]:
with open(f'validation_results/{entropy}.pkl','wb') as file:
    pickle.dump(actions_taken,file)

In [7]:
given_entropies = ['0.01','0.001','0.0001','1e-05']

In [8]:
mutations_array_for_all_entropies = []

In [9]:
for entropy in given_entropies:
    saved_model_file_path = f'saved_models/saved_rl_model_1_lr_0.0007_gamma_0.95_ep_8000_entropic_factor_{entropy}_co_30.0_trained_on_tp_30_training.pth'
    loaded_estimator_each_entropy = PolicyNetwork(n_state, n_action, n_hidden, lr, entropy_weight=entropy)
    loaded_estimator_each_entropy.model.load_state_dict(torch.load(saved_model_file_path))
    total_reward_array,mean_validation_reward, standard_deviation,actions_taken, mutations_array = evaluate_model(estimator=loaded_estimator_each_entropy, env=env,num_episodes=500)
    with open(f'validation_results/{entropy}.pkl','wb') as file:
        pickle.dump(actions_taken,file)
    mutations_array_for_all_entropies.append([entropy,mutations_array])

100%|███████████████████████████████████████████████████████████████████████████████████████| 500/500 [42:36<00:00,  5.11s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████| 500/500 [41:05<00:00,  4.93s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████| 500/500 [37:16<00:00,  4.47s/it]
100%|███████████████████████████████████████████████████████████████████████████████████████| 500/500 [33:23<00:00,  4.01s/it]


In [3]:
def validate_entropy(entropy_file_path):
    saved_model_file_path = entropy_file_path
    entropy_base_path = os.path.basename(entropy_file_path).split('.')[0]
    loaded_estimator_each_entropy = PolicyNetwork(n_state, n_action, n_hidden, lr, entropy_weight=entropy)
    loaded_estimator_each_entropy.model.load_state_dict(torch.load(saved_model_file_path))
    total_reward_array,mean_validation_reward, standard_deviation,actions_taken, mutations_array = evaluate_model(estimator=loaded_estimator_each_entropy, env=env,num_episodes=500)
    with open(f'validation_results/{entropy_base_path}.pkl','wb') as file:
        pickle.dump(actions_taken,file)
    mutations_array_for_all_entropies.append([entropy,mutations_array])

In [14]:
mutations_array_for_all_entropies
with open('no_of_mutations_array_all_entropies.pkl', 'wb') as file:
    pickle.dump(mutations_array_for_all_entropies, file)

In [37]:
with open('delete.pkl', 'wb') as file:
    pickle.dump(actions_taken, file)

In [1]:
with open('no_of_mutations_array_all_entropies.pkl','rb') as file:
    loaded_dict = pickle.load(file)

NameError: name 'pickle' is not defined

In [16]:
loaded_dict

[['0.01',
  [6,
   5,
   2,
   8,
   3,
   9,
   15,
   8,
   2,
   6,
   2,
   5,
   5,
   7,
   15,
   6,
   7,
   5,
   7,
   5,
   13,
   3,
   2,
   4,
   3,
   2,
   3,
   15,
   4,
   5,
   3,
   7,
   3,
   7,
   15,
   3,
   4,
   11,
   3,
   4,
   11,
   15,
   15,
   6,
   2,
   15,
   6,
   15,
   4,
   7,
   15,
   5,
   2,
   2,
   5,
   11,
   7,
   5,
   5,
   9,
   5,
   5,
   2,
   2,
   5,
   2,
   2,
   9,
   3,
   5,
   3,
   15,
   2,
   4,
   15,
   15,
   5,
   15,
   15,
   15,
   9,
   3,
   7,
   3,
   14,
   15,
   2,
   15,
   2,
   2,
   5,
   4,
   2,
   3,
   9,
   2,
   4,
   15,
   6,
   2,
   12,
   8,
   4,
   2,
   4,
   2,
   7,
   6,
   4,
   2,
   5,
   5,
   6,
   15,
   15,
   7,
   4,
   15,
   9,
   6,
   7,
   15,
   6,
   5,
   8,
   6,
   3,
   4,
   3,
   4,
   7,
   15,
   8,
   5,
   8,
   6,
   2,
   2,
   10,
   3,
   4,
   7,
   5,
   3,
   15,
   7,
   15,
   7,
   6,
   6,
   9,
   15,
   14,
   6,
   2,
   5,
   3,
   15,
   15,


In [34]:
actions_taken

{0: [[18, 'D'],
  [23, 'P'],
  [10, 'P'],
  [12, 'P'],
  [23, 'P'],
  [12, 'P'],
  [18, 'D'],
  [12, 'P'],
  [12, 'P'],
  [12, 'P'],
  [24, 'F'],
  [25, 'P']],
 1: [[25, 'P'], [10, 'P']],
 2: [[12, 'P'],
  [10, 'P'],
  [10, 'P'],
  [10, 'P'],
  [25, 'P'],
  [25, 'P'],
  [10, 'D'],
  [22, 'T'],
  [10, 'P'],
  [18, 'D'],
  [26, 'P'],
  [23, 'P'],
  [12, 'P'],
  [18, 'D'],
  [23, 'P']],
 3: [[23, 'P'],
  [25, 'P'],
  [25, 'P'],
  [12, 'P'],
  [10, 'P'],
  [25, 'P'],
  [23, 'P'],
  [10, 'P'],
  [25, 'P'],
  [10, 'P'],
  [10, 'P'],
  [23, 'P'],
  [23, 'P'],
  [23, 'P'],
  [23, 'P']],
 4: [[10, 'P'], [18, 'D'], [25, 'P']],
 5: [[10, 'P'],
  [18, 'D'],
  [23, 'P'],
  [10, 'P'],
  [12, 'P'],
  [12, 'P'],
  [25, 'P']],
 6: [[25, 'P'], [11, 'M'], [12, 'P'], [18, 'D'], [10, 'P']],
 7: [[12, 'P'],
  [23, 'P'],
  [12, 'P'],
  [23, 'P'],
  [18, 'D'],
  [18, 'D'],
  [23, 'P'],
  [10, 'P']],
 8: [[25, 'P'], [18, 'D'], [18, 'D'], [10, 'P']],
 9: [[12, 'P'], [23, 'P']],
 10: [[18, 'D'],
  [12, 'P'],
  [

In [25]:
df_of_actions = pd.concat([pd.DataFrame(actions_taken[i],columns=['position','amino_acid']) for i in actions_taken.keys()])

In [None]:
with open('delete.pl')

In [35]:
actions_taken

{0: [[18, 'D'],
  [23, 'P'],
  [10, 'P'],
  [12, 'P'],
  [23, 'P'],
  [12, 'P'],
  [18, 'D'],
  [12, 'P'],
  [12, 'P'],
  [12, 'P'],
  [24, 'F'],
  [25, 'P']],
 1: [[25, 'P'], [10, 'P']],
 2: [[12, 'P'],
  [10, 'P'],
  [10, 'P'],
  [10, 'P'],
  [25, 'P'],
  [25, 'P'],
  [10, 'D'],
  [22, 'T'],
  [10, 'P'],
  [18, 'D'],
  [26, 'P'],
  [23, 'P'],
  [12, 'P'],
  [18, 'D'],
  [23, 'P']],
 3: [[23, 'P'],
  [25, 'P'],
  [25, 'P'],
  [12, 'P'],
  [10, 'P'],
  [25, 'P'],
  [23, 'P'],
  [10, 'P'],
  [25, 'P'],
  [10, 'P'],
  [10, 'P'],
  [23, 'P'],
  [23, 'P'],
  [23, 'P'],
  [23, 'P']],
 4: [[10, 'P'], [18, 'D'], [25, 'P']],
 5: [[10, 'P'],
  [18, 'D'],
  [23, 'P'],
  [10, 'P'],
  [12, 'P'],
  [12, 'P'],
  [25, 'P']],
 6: [[25, 'P'], [11, 'M'], [12, 'P'], [18, 'D'], [10, 'P']],
 7: [[12, 'P'],
  [23, 'P'],
  [12, 'P'],
  [23, 'P'],
  [18, 'D'],
  [18, 'D'],
  [23, 'P'],
  [10, 'P']],
 8: [[25, 'P'], [18, 'D'], [18, 'D'], [10, 'P']],
 9: [[12, 'P'], [23, 'P']],
 10: [[18, 'D'],
  [12, 'P'],
  [

In [32]:
df_of_actions

Unnamed: 0,position,amino_acid
0,18,D
1,23,P
2,10,P
3,12,P
4,23,P
...,...,...
0,25,P
1,25,P
2,12,P
3,10,P


In [31]:
df_of_actions

Unnamed: 0,position,amino_acid
0,18,D
1,23,P
2,10,P
3,12,P
4,23,P
...,...,...
0,25,P
1,25,P
2,12,P
3,10,P


In [13]:
df_of_actions

Unnamed: 0,position,amino_acid
0,18,D
1,23,P
2,10,P
3,12,P
4,23,P
...,...,...
0,25,P
1,25,P
2,12,P
3,10,P


In [10]:
pd.DataFrame(df_of_actions['amino_acid'].value_counts()).reset_index().rename(columns={'index':'amino_acid','amino_acid':'frequency'})

Unnamed: 0,amino_acid,frequency
0,P,1360
1,D,282
2,T,17
3,G,13
4,V,4
5,F,3
6,Y,3
7,L,2
8,N,2
9,E,2
