# About this Notebook
## This is for the only Helix Model
This is for training the model to disrupt helices. We can use the Stable Baselines 3 implementations of different Reinforcement Learning Models and can train and validate them 

In [1]:
import Helix_only
import stable_baselines3
import pandas as pd
import numpy as np
import glob, os, shutil
import gymnasium as gym
import numpy as np
from tqdm import tqdm
import matplotlib.pyplot as plt
from stable_baselines3.common.monitor import Monitor
from stable_baselines3.common.results_plotter import load_results, ts2xy


Inorder to get your model running, specify the following information

In [None]:
# enter the folder path that has pdb files of helices containing a certain length
folder_contining_pdb_files = '' 
# enter the folder path that has the pdb files for validating your model
folder_containing_pdb_files_for_validation = ''
# enter the folder path where validation files need to be saved.
folder_to_save_validation_files = ''
# enter a unique name. can be anything - this is to ensure two models can run paralelly. 
unique_path_to_give_for_file = ''
sequence_encoding_type = 'esm' # allowed options are 'esm' and 'biotite'
use_proline = True# allowed are True and False, whether to use Proline as a mutation or not.
reward_cutoff_percentage = 70 # this implies you get rewards for 70% disruption
reward_cutoff = 100 - reward_cutoff_percentage

In [None]:
env = Helix_only.PeptideEvolution(folder_containing_pdb_files=folder_containing_pdb_files,
                            folder_to_save_validation_files=folder_to_save_validation_files,
                            reward_cutoff=reward_cutoff,
                            unique_path_to_give_for_file='newalgo',
                            sequence_encoding_type=sequence_encoding_type,use_proline=True)

# Proximal Policy Optimisation
Other Algorithms can also be used - refer to https://stable-baselines3.readthedocs.io/en/master/index.html for more information

In [1]:
from stable_baselines3 import PPO
from stable_baselines3.ppo import MlpPolicy
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Create log dir
log_dir = "ppo_training_log"
os.makedirs(log_dir, exist_ok=True)


## training

In [None]:
# Create RL model
model = PPO('MlpPolicy', env, verbose=1)
# Train the agent
model.learn(total_timesteps=int(8000*7),progress_bar=True )

In [None]:
model.save("saved_models/ppo_training_with_proline_tp")

In [None]:
trained_ppo_model = PPO.load('saved_models/ppo_training_with_proline_tp.zip')

# Evaluation and Validation 
Stable Baselines3 provides useful helper functions to evaluate the policy. 

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
eval_env = Helix_only.PeptideEvolution(folder_containing_pdb_files=folder_containing_pdb_files_for_validation,
                            folder_to_save_validation_files=folder_to_save_validation_files,
                            reward_cutoff=reward_cutoff,
                            unique_path_to_give_for_file='newalgo',
                            sequence_encoding_type=sequence_encoding_type,use_proline=True,validation=True)

In [None]:
# Evaluate the loaded policy
mean_reward, std_reward = evaluate_policy(trained_ppo_model, eval_env, n_eval_episodes=10,deterministic=False)

print(f"mean_reward={mean_reward:.2f} +/- {std_reward}")