In [1]:
import optuna
from optuna.samplers import TPESampler

import os

import numpy as np
import pandas as pd
import copy
import logging

from collections import defaultdict

from typing import List, Tuple

from sklearn.model_selection import train_test_split
import torch
import pandas as pd
from botorch.models import SingleTaskGP
from gpytorch.mlls import ExactMarginalLogLikelihood
from botorch.fit import fit_gpytorch_model
from sklearn.decomposition import PCA
from botorch.acquisition import UpperConfidenceBound
from botorch.optim import optimize_acqf




In [2]:

original_peptide = "FHWWWPSPSVPS"

peptides = {
  original_peptide: 0.0, # original
  "FAWWWPSPSVPS": 3.0, # position=1, aac_index=0
  "FHAWWPSPSVPS": 3.0, # position=2, aac_index=0
  "FHWAWPSPSVPS": 4.0, # position=3, aac_index=0
  "FHWWAPSPSVPS": 5.0, # position=4, aac_index=0
  "FHWWWASPSVPS": 4.0, # position=5, aac_index=0
  "FHWWWPAPSVPS": 4.0, # position=6, aac_index=0
  "FHWWWPSASVPS": 5.0, # position=7, aac_index=0
  "FHWWWPSPAVPS": 3.0, # position=8, aac_index=0
  "FHWWWPSPSAPS": 3.0, # position=9, aac_index=0
  "FHWWWPSPSVAS": 3.5, # position=10, aac_index=0
  "FHWWWPSPSVPA": 4.0, # position=11, aac_index=0
}

# convert the peptides to the parameters.
params = []
for idx, one_peptide in enumerate(peptides):
  params.append({
    "position": idx,
    "aac_index": 0, # Change the aac to 'A'
    "objective": peptides[one_peptide],
  })

In [3]:


def change_amino_acid(original_peptide, position, aac_index, amino_acids=None):
  if amino_acids is None:
    amino_acids = 'ACDEFGHIKLMNPQRSTVWY'

  modified_peptide = list(original_peptide)
  modified_peptide[position] = amino_acids[aac_index]
  modified_peptide = ''.join(modified_peptide)
  return modified_peptide


def objective(trial, original_peptide, params):
  
  num_aac = len(original_peptide)
  amino_acids = 'ACDEFGHIKLMNPQRSTVWY'
  
  if trial.number < len(params):
    # Since Optuna cannot fit the surrogate model in batch mode, 
    # we provide the parameters one at a time.
    trail_position = params[trial.number]["position"]
    trail_aac_index = params[trial.number]["aac_index"]
    objective = params[trial.number]["objective"]
    
    # We already know the objective value for the given parameters,
    # so we just set the parameters and return the objective value.
    _ = trial.suggest_int("position", trail_position, trail_position)
    _ = trial.suggest_int("aac_index", trail_aac_index, trail_aac_index)
    return objective
  
  else:
    # run the optimization once.
    _ = trial.suggest_int("position", 0, num_aac-1)
    _ = trial.suggest_int("aac_index", 0, len(amino_acids)-1)
    
    # Since our goal is running optimization only once and we don't have objective values for the new parameters,
    # we return any value (e.g. -1) to indicate that the optimization should stop.
    return -1


study = optuna.create_study(
  sampler=TPESampler(), 
  direction="maximize")


# Following is any example of running the optimization and generate one peptide.
# You can generate more peptides by increasing n_trials.
study.optimize(lambda trial: objective(trial, original_peptide, params), n_trials=len(params)+1)


# Get the last trial and generate the final modified peptide.
last_trial = study.trials[-1]
position, aac_index = last_trial.params["position"], last_trial.params["aac_index"]

final_modified_peptide = change_amino_acid(original_peptide, position, aac_index)


# Perhaps you can estimate the predicted mean using SingleTaskGP to identify which peptide is optimal.
final_modified_peptide

[I 2024-10-30 12:49:59,143] A new study created in memory with name: no-name-3b94e5c2-9854-4c82-889f-e7fae993c542
[I 2024-10-30 12:49:59,146] Trial 0 finished with value: 0.0 and parameters: {'position': 0, 'aac_index': 0}. Best is trial 0 with value: 0.0.
[I 2024-10-30 12:49:59,147] Trial 1 finished with value: 3.0 and parameters: {'position': 1, 'aac_index': 0}. Best is trial 1 with value: 3.0.
[I 2024-10-30 12:49:59,149] Trial 2 finished with value: 3.0 and parameters: {'position': 2, 'aac_index': 0}. Best is trial 1 with value: 3.0.
[I 2024-10-30 12:49:59,151] Trial 3 finished with value: 4.0 and parameters: {'position': 3, 'aac_index': 0}. Best is trial 3 with value: 4.0.
[I 2024-10-30 12:49:59,152] Trial 4 finished with value: 5.0 and parameters: {'position': 4, 'aac_index': 0}. Best is trial 4 with value: 5.0.
[I 2024-10-30 12:49:59,153] Trial 5 finished with value: 4.0 and parameters: {'position': 5, 'aac_index': 0}. Best is trial 4 with value: 5.0.
[I 2024-10-30 12:49:59,155] 

'FHWWHPSPSVPS'