In [None]:
import torch
import torch.nn as nn
from transformers import AutoModelForCausalLM, AutoTokenizer

# ============================
# 1. LSTM Reward Model
# ============================
class Net(nn.Module):
    def __init__(self, lstm_size, hidden_size, dropout_rate):
        super(Net, self).__init__()

        input_size = 300  # The size of the input vector (embedding size)
        out_size = 1      # Predicting a single scalar value (pIC50)

        # Define layers
        self.lstm = nn.LSTM(input_size=input_size, hidden_size=lstm_size, num_layers=1, batch_first=True, bidirectional=False)
        self.fc1 = nn.Linear(lstm_size, hidden_size)  # Fully connected hidden layer
        self.activation = nn.ReLU()                  # Non-linear activation
        self.fc_out = nn.Linear(hidden_size, out_size)  # Output layer
        self.dropout = nn.Dropout(dropout_rate)       # Dropout layer for regularization

    def forward(self, x):
        # Forward pass
        out, (h_n, c_n) = self.lstm(x)  # LSTM: h_n is the last hidden state
        out = h_n[-1]                   # Get the last hidden state (LSTM output for the last timestep)
        out = self.dropout(out)         # Apply dropout
        out = self.fc1(out)             # Pass through the hidden layer
        out = self.activation(out)      # Apply ReLU activation
        out = self.dropout(out)         # Apply dropout again
        out = self.fc_out(out)          # Final output layer
        return out

reward_model = torch.load("mol2vecLSTM.pth", map_location=torch.device('cpu'))
reward_model.eval()

print("Entire model loaded successfully!")


Entire model loaded successfully!


  reward_model = torch.load("mol2vecLSTM.pth", map_location=torch.device('cpu'))


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
token = "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"
from huggingface_hub import login, logout
login(token) # non-blocking login|

In [None]:
# ============================
# 2. Policy Model (LLM)
# ============================
policy_model_name = "vonPipe/jak2InstructSFT"
policy_model = AutoModelForCausalLM.from_pretrained(policy_model_name)
tokenizer = AutoTokenizer.from_pretrained(policy_model_name)





NameError: name 'AutoModelForCausalLM' is not defined

In [None]:
!pip install selfies
!pip install rdkit
!pip install selfies
!pip install deepchem
!pip install gensim
!pip install torch
!pip install git+https://github.com/samoturk/mol2vec

Collecting git+https://github.com/samoturk/mol2vec
  Cloning https://github.com/samoturk/mol2vec to /tmp/pip-req-build-alzttblu
  Running command git clone --filter=blob:none --quiet https://github.com/samoturk/mol2vec /tmp/pip-req-build-alzttblu
  Resolved https://github.com/samoturk/mol2vec to commit 850d944d5f48a58e26ed0264332b5741f72555aa
  Preparing metadata (setup.py) ... [?25l[?25hdone


In [None]:
import pandas as pd
import numpy as np
import rdkit
from rdkit import Chem
from rdkit.Chem import AllChem, Descriptors
from rdkit.Chem import PandasTools
from rdkit.Chem.rdmolops import RemoveHs
from rdkit.Chem import rdmolfiles
from rdkit.Chem import rdMolDescriptors
from rdkit import DataStructs

import selfies as sf
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader

# For handling the embedding layer and Mol2Vec
from gensim.models import word2vec
from gensim.models import Word2Vec
import deepchem as dc

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')


Instructions for updating:
experimental_relax_shapes is deprecated, use reduce_retracing instead


In [None]:
import selfies as sf

def selfies_to_molecule(selfies_string):
    try:
        return Chem.MolFromSmiles(sf.decoder(selfies_string));
    except Exception as e:
        return f"Error: {e}"


In [None]:
from gensim.models import word2vec
model = word2vec.Word2Vec.load('model_300dim.pkl')
from mol2vec.features import mol2alt_sentence, mol2sentence, MolSentence, DfVec, sentences2vec
from gensim.models import word2vec

def patched_sentences2vec(sentences, keyed_vectors, unseen=None):
    """
    Convert a list of sentences into a list of vectors, using the KeyedVectors from Gensim 4.x.
    """
    keys = set(keyed_vectors.key_to_index.keys())
    vec = []
    for sentence in sentences:
        sentence_vec = []
        for word in sentence:
            if word in keys:
                sentence_vec.append(keyed_vectors[word])
            elif unseen is not None and unseen in keys:
                sentence_vec.append(keyed_vectors[unseen])
        vec.append(np.mean(sentence_vec, axis=0) if sentence_vec else np.zeros(keyed_vectors.vector_size))
    return np.array(vec)

In [None]:
import numpy as np

def molecule_to_vec(mol):
  sentence = MolSentence(mol2alt_sentence(mol, 1))
  mol_Vec = DfVec(patched_sentences2vec(sentence, model.wv, unseen='UNK'))
  X = np.array(mol_Vec.vec)
  return X

In [None]:
# ============================
# 3. Generate Trajectories
# ============================


def generate_trajectory(policy_model, tokenizer, reward_model):
    alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.

    ### Instruction:
    {}

    ### Input:
    {}

    ### Response:
    {}"""

    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

    # Move models to the same device
    policy_model.to(device)
    reward_model.to(device)

    # Number of examples to generate
    num_examples = 1
    batch_size = 1  # Adjust batch size based on your GPU memory

    responses = []

    def is_valid_selfies(selfies_str):
        try:
            smiles = sf.decoder(selfies_str)
            from rdkit import Chem
            mol = Chem.MolFromSmiles(smiles)
            return mol is not None
        except Exception:
            return False

    num_batches = num_examples // batch_size
    for batch_num in range(num_batches):
        print(f"Generating batch {batch_num + 1}/{num_batches}")

        # Prepare prompts for the batch
        prompts = [alpaca_prompt.format(
            "You love and excel generating SELFIES strings for drug-like molecules. Generate a SELFIES representation of a molecule that could inhibit the JAK2 protein",
            "",
            "",
        ) for _ in range(batch_size)]

        # Tokenize inputs and move tensors to device
        inputs = tokenizer(prompts, return_tensors='pt', padding=True, truncation=True).to(device)

        outputs = policy_model.generate(
            input_ids=inputs['input_ids'],
            attention_mask=inputs['attention_mask'],
            max_new_tokens=256,
            num_return_sequences=1,
            do_sample=True,
            temperature=0.7,
            top_p=0.9,
            pad_token_id=tokenizer.pad_token_id,
        )

        # Decode outputs
        for i in range(batch_size):
            input_length = inputs['input_ids'][i].shape[0]
            generated_tokens = outputs[i][input_length:]
            response_text = tokenizer.decode(generated_tokens, skip_special_tokens=True).strip()
            responses.append(response_text)

    valid_responses = []
    for idx, response in enumerate(responses):
        if is_valid_selfies(response):
            valid_responses.append({'selfies': response})
            print(response)
        else:
            print(f"Invalid SELFIES at index {idx}: {response}")

    # Convert to DataFrame
    reward = 0
    df = pd.DataFrame(valid_responses, columns=['selfies'])

    def selfies_to_smiles(selfies_string):
        try:
            return sf.decoder(selfies_string)
        except Exception as e:
            return f"Error: {e}"

    # Apply transformations (ensure no tensors leave GPU where possible)
    df['SMILES'] = df['selfies'].apply(selfies_to_smiles)
    df['mol'] = df['SMILES'].apply(lambda x: Chem.MolFromSmiles(x))
    df['sentence'] = df.apply(lambda x: MolSentence(mol2alt_sentence(x['mol'], 1)), axis=1)
    df['mol2vec'] = [
        DfVec(x) for x in patched_sentences2vec(df['sentence'], model.wv, unseen='UNK')
    ]

    X = np.array([x.vec for x in df['mol2vec']])
    x_test_tensor = torch.tensor(X, dtype=torch.float32).to(device)  # Move tensor to device

    x_test_tensor = x_test_tensor.unsqueeze(1)  # Add sequence length dimension
    y_pred_test = reward_model(x_test_tensor)

    y_array = y_pred_test.detach().cpu().numpy()  # Ensure numpy conversion happens on CPU

    reward = np.mean(y_array)

    return {
        "input_ids": inputs.input_ids,
        "generated_ids": outputs,
        "reward": reward
    }

generate_trajectory(policy_model, tokenizer, reward_model)



NameError: name 'policy_model' is not defined

In [None]:
# ============================
# 4. PPO Class
# ============================
class PPO:

    def __init__(self, policy_model, reward_model, lr= 1.41 , gamma=0.99, eps_clip=0.2):
        self.policy_model = policy_model
        self.reward_model = reward_model
        self.optimizer = torch.optim.Adam(policy_model.parameters(), lr=lr)
        self.gamma = gamma  # Discount factor
        self.eps_clip = eps_clip  # Clipping parameter

    def compute_advantages(self, rewards, values):
        rewards = torch.tensor(rewards, dtype=torch.float32).to("cpu")
        values = torch.tensor(values, dtype=torch.float32).to("cpu")

        if len(rewards) != len(values):  # Validate shapes
           raise ValueError(f"Rewards and values must have the same length, but got {len(rewards)} and {len(values)}.")

        advantages = torch.zeros_like(rewards).to("cpu")  # Initialize a tensor for advantages
        gae = 0
        for t in reversed(range(len(rewards))):
            delta = rewards[t] + self.gamma * (values[t + 1] if t + 1 < len(values) else 0) - values[t]
            gae = delta + self.gamma * gae
            advantages[t] = gae
        return advantages

    def update(self, trajectories):
        for trajectory in trajectories:
            rewards = trajectory["reward"]  # Should be a list or 1-D tensor
            generated_ids = trajectory["generated_ids"].to("cpu")
            inputs = trajectory["input_ids"].to("cpu")

            # Compute log probs and values
            outputs = self.policy_model(inputs)
            logits = outputs.logits  # Shape: (batch_size, seq_len, vocab_size)
            log_probs = torch.nn.functional.log_softmax(logits, dim=-1)

            # Use only the last timestep's logits for value estimation
            values = logits[:, -1, :].max(dim=-1)[0]  # Shape: (batch_size,)

            # Ensure rewards is tensor and 1-D
            rewards = torch.tensor(rewards, dtype=torch.float32).flatten().to("cpu")
            if len(rewards) != values.size(0):  # Check compatibility
                raise ValueError(f"Mismatch in rewards and values shapes: {len(rewards)} vs {values.size(0)}")

            # Compute advantages
            advantages = self.compute_advantages(rewards, values)

            # PPO objective
            ratio = torch.exp(log_probs - log_probs.detach())  # Importance ratio
            clipped_ratio = torch.clamp(ratio, 1 - self.eps_clip, 1 + self.eps_clip)
            loss = -torch.min(ratio * advantages, clipped_ratio * advantages).mean()

            # Optimize
            self.optimizer.zero_grad()
            loss.backward()
            self.optimizer.step()


In [None]:
import torch

# ============================
# 5. Training Loop
# ============================

ppo = PPO(policy_model, reward_model)

num_epochs = 16
torch.cuda.empty_cache()  # Clear any residual memory at the start

scaler = torch.cuda.amp.GradScaler()  # Mixed precision scaler

for epoch in range(num_epochs):
    trajectories = []
    trajectory = generate_trajectory(policy_model, tokenizer, reward_model)
    trajectories.append(trajectory)

    ppo.update(trajectories)

    print(f"Epoch {epoch + 1} completed.")

Generating batch 1/1
[C][C][C][Branch1][Ring1][#C][C][N][C][=C][Branch1][Branch1][C][=N][Ring1][Branch1][C][=N][C][=Branch1][O][=C][N][N][=C][C][=C][Ring1][=Branch1][C][Branch1][C][F][Branch1][C][F][F][C][=C][Ring1][#Branch1]




In [None]:
policy_model.save_pretrained("PPOJak2") # Local saving
tokenizer.save_pretrained("PPOJak2")

model_name_on_hub = "PPOJak2"
model.push_to_hub(model_name_on_hub)
tokenizer.push_to_hub(model_name_on_hub)