In [None]:
from Solubility_Predictor import predict_logS
from Toxicity_Predictor import predict_toxicity
import numpy as np
from rdkit import Chem
from rdkit.Chem import Descriptors
from rdkit.Chem import Crippen, Lipinski, QED
from rdkit.Chem.rdMolDescriptors import CalcTPSA, CalcNumRotatableBonds

def get_rdkit_properties(smiles):
    mol = Chem.MolFromSmiles(smiles)
    if mol is None:
        return {"Error": "Invalid SMILES"}

    return {
        "Canonical SMILES": Chem.MolToSmiles(mol),
        "Molecular Formula": Chem.rdMolDescriptors.CalcMolFormula(mol),
        "Molecular Weight": round(Descriptors.MolWt(mol), 3),
        "LogP": round(Crippen.MolLogP(mol), 3),
        "H-Bond Donors": Lipinski.NumHDonors(mol),
        "H-Bond Acceptors": Lipinski.NumHAcceptors(mol),
        "TPSA (Å²)": round(CalcTPSA(mol), 3),
        "Rotatable Bonds": CalcNumRotatableBonds(mol),
        "Formal Charge": Chem.GetFormalCharge(mol),
        "QED Score": round(QED.qed(mol), 3)
    }

def predict_all_properties(smiles):
    results = get_rdkit_properties(smiles)

    # If RDKit fails, skip ML parts too
    if "Error" in results:
        return results

    # Add ML-based predictions
    results["Solubility (LogS)"] = predict_logS(smiles)
    results["Toxicity Probability"] = predict_toxicity(smiles)

    return results

if __name__ == "__main__":
    test_smiles = "Cc1occc1C(=O)Nc2ccccc2"  
    props = predict_all_properties(test_smiles)
    for k, v in props.items():
        print(f"{k}: {v}")


Some weights of RobertaForSequenceClassification were not initialized from the model checkpoint at seyonec/ChemBERTa-zinc-base-v1 and are newly initialized: ['classifier.dense.bias', 'classifier.dense.weight', 'classifier.out_proj.bias', 'classifier.out_proj.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


Canonical SMILES: Cc1occc1C(=O)Nc1ccccc1
Molecular Formula: C12H11NO2
Molecular Weight: 201.225
LogP: 2.84
H-Bond Donors: 1
H-Bond Acceptors: 2
TPSA (Å²): 42.24
Rotatable Bonds: 2
Formal Charge: 0
QED Score: 0.811
Solubility (LogS): -3.2179999351501465
Toxicity Probability: 🧪 Toxicity Probability: 0.352
