In [4]:
#Predict the probability of hepatotoxicity of a chemical

In [1]:
#Predict the probability of hepatotoxicity of a chemical
import torch  
from transformers import AutoTokenizer, AutoModelForSequenceClassification, AutoConfig  
import torch.nn as nn  
# Load pre-trained model and tokenizer
model_path = "saved_model"  # Make sure the path points to the correct model folder
tokenizer = AutoTokenizer.from_pretrained(model_path)  
config = AutoConfig.from_pretrained(model_path)  
# Ensure the model outputs hidden states
config.output_hidden_states = True  
# Load model - directly use CPU
model = AutoModelForSequenceClassification.from_pretrained(model_path, config=config)  
# Define classifier
class SimpleClassifier(nn.Module):  
    def __init__(self, input_dim):  
        super(SimpleClassifier, self).__init__()  
        self.linear = nn.Linear(input_dim, 1)  
    
    def forward(self, x):  
        return self.linear(x)  
# Dynamically set the input dimension of the classifier
classifier = SimpleClassifier(config.hidden_size + config.num_labels)  
# Load model weights - add map_location parameter to ensure loading to CPU
checkpoint = torch.load("best_model.pth", map_location=torch.device('cpu'))  
model.load_state_dict(checkpoint['model_state_dict'])  
classifier.load_state_dict(checkpoint['classifier_state_dict'])  
# Set to evaluation mode
model.eval()  
classifier.eval()  
# Define function to generate enhanced embeddings
def generate_enhanced_embeddings(smiles, tokenizer, model):  
    # Encode SMILES input
    inputs = tokenizer(smiles, padding=True, truncation=True, return_tensors="pt")  
    outputs = model(**inputs)  
    
    # Extract embeddings from the last layer
    embeddings = outputs.hidden_states[-1][:, 0, :]  
    
    # Get model prediction probabilities
    predictions = torch.sigmoid(outputs.logits)  
    
    # Concatenate embeddings and prediction probabilities
    enhanced_embeddings = torch.cat((embeddings, predictions), dim=-1)  
    
    return enhanced_embeddings, predictions  
# Define a prediction function
def predict(smiles):  
    # Get enhanced embeddings and prediction probabilities
    enhanced_embeddings, predictions = generate_enhanced_embeddings(smiles, tokenizer, model)  
    
    # Input enhanced embeddings to the classifier for final prediction
    classifier_predictions = classifier(enhanced_embeddings)  
    
    # Get the final prediction probability from the classifier
    final_predictions = torch.sigmoid(classifier_predictions).item()  
    
    # Output prediction probability (rounded to two decimal places)
    return round(final_predictions, 2)  
# Input a SMILES expression
smiles = "COC1=C2C3=C(C(=O)CC3)C(=O)OC2=C4[C@@H]5C=CO[C@@H]5OC4=C1"  # Example SMILES structure
prediction = predict(smiles)  
# Output prediction result
print(f"Predicted probability: {prediction}")

  from .autonotebook import tqdm as notebook_tqdm
Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Predicted probability: 1.0


In [3]:
#Prediction probability values of 19 assays

In [2]:
#Prediction probability values of 19 assays
import torch  
from transformers import AutoTokenizer, AutoModelForSequenceClassification  
# Define label names  
labels = [  
    "Caspase-3/7 HepG2 qHTS", "CYP1A2 Antag qHTS", "CYP2C19 Antag qHTS",   
    "CYP2C9 Antag qHTS", "CYP3A4 Antag Reporter qHTS", "CYP3A7 Antag Cell qHTS",   
    "ARE Agon qHTS", "MMP qHTS", "ER Stress", "ER-beta Agon qHTS: Summary",   
    "PPARg Agon qHTS: Summary", "RAR Agon qHTS", "ERR Antag qHTS", "GR Antag qHTS",   
    "PPARd Antag qHTS", "PPARg Antag Summary qHTS", "TR Antag Summary qHTS", "MDR-1",   
    "HPGD Inhib qHTS"  
]  
# Load model and tokenizer  
model_path = "saved_model"  # Ensure the path points to the correct model folder  
tokenizer = AutoTokenizer.from_pretrained(model_path)  
model = AutoModelForSequenceClassification.from_pretrained(model_path)  
# Load model weights - add map_location parameter to ensure loading to CPU  
model_save_path = "best_model.pth"  
checkpoint = torch.load(model_save_path, map_location=torch.device('cpu'))  
model.load_state_dict(checkpoint['model_state_dict'])  
model.eval()  
# Define function to generate prediction probabilities  
def get_predictions(smiles):  
    inputs = tokenizer(smiles, padding=True, truncation=True, return_tensors="pt")  
    with torch.no_grad():  
        outputs = model(**inputs)  
        # Use sigmoid to calculate probabilities  
        predictions = torch.sigmoid(outputs.logits).numpy().flatten()  
    # Round prediction probabilities to two decimal places and create a dictionary output  
    predicted_probabilities = {labels[i]: predictions[i] for i in range(len(labels))}  
    return predicted_probabilities  
# Input SMILES expression  
smiles_input = "COC1=C2C3=C(C(=O)CC3)C(=O)OC2=C4[C@@H]5C=CO[C@@H]5OC4=C1"  # Example SMILES, can be replaced with any SMILES  
predictions = get_predictions(smiles_input)  
# Output prediction results  
for label, prob in predictions.items():  
    print(f"{label}: {prob:.2f}")

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


Caspase-3/7 HepG2 qHTS: 0.04
CYP1A2 Antag qHTS: 0.04
CYP2C19 Antag qHTS: 0.05
CYP2C9 Antag qHTS: 1.00
CYP3A4 Antag Reporter qHTS: 0.99
CYP3A7 Antag Cell qHTS: 0.99
ARE Agon qHTS: 1.00
MMP qHTS: 0.97
ER Stress: 0.04
ER-beta Agon qHTS: Summary: 0.98
PPARg Agon qHTS: Summary: 0.99
RAR Agon qHTS: 0.98
ERR Antag qHTS: 0.06
GR Antag qHTS: 0.99
PPARd Antag qHTS: 0.03
PPARg Antag Summary qHTS: 1.00
TR Antag Summary qHTS: 0.05
MDR-1: 0.05
HPGD Inhib qHTS: 0.98
