In [7]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit import DataStructs

def calculate_tanimoto_similarity_gpu(smiles1, smiles2):
    mol1 = Chem.MolFromSmiles(smiles1)
    mol2 = Chem.MolFromSmiles(smiles2)

    if mol1 is not None and mol2 is not None:
        # TODO: Consider using RDKit-GPU for GPU-accelerated fingerprint calculations
        fp1 = AllChem.GetMorganFingerprintAsBitVect(mol1, 2, nBits=1024)
        fp2 = AllChem.GetMorganFingerprintAsBitVect(mol2, 2, nBits=1024)

        # Calculate Tanimoto similarity on CPU
        similarity = DataStructs.FingerprintSimilarity(fp1, fp2)
        return similarity
    else:
        return 0.0  # Return 0.0 if there's an issue with molecule conversion

def save_smiles_with_non_100_similarity(molecule_file, pdb_file):
    # Read SMILES from generated_molecules.txt
    with open(molecule_file, 'r') as file:
        molecule_smiles = [line.strip() for line in file if line.strip()]

    # Read and filter valid SMILES from pdb.txt
    with open(pdb_file, 'r') as file:
        pdb_smiles = [line.strip() for line in file if line.strip()]
        pdb_smiles = [smiles for smiles in pdb_smiles if Chem.MolFromSmiles(smiles) is not None]

    # List to store SMILES with less than 100% similarity
    non_100_similarity_smiles = []

    # Iterate through each SMILES from pdb.txt and check Tanimoto similarity with all SMILES from generated_molecules.txt
    for pdb_smile in pdb_smiles:
        # Check if all similarities are less than 1.0
        if all(calculate_tanimoto_similarity_gpu(pdb_smile, molecule_smile) < 1.0 for molecule_smile in molecule_smiles):
            print(f"No 100% Tanimoto similarity found for SMILES: {pdb_smile}")
            non_100_similarity_smiles.append(pdb_smile)

    # Save SMILES with less than 100% similarity to a new file named non_100_similarity_inhibitor_smiles.txt
    with open('non_100_similarity_inhibitor_smiles.txt', 'w') as output_file:
        for smile in non_100_similarity_smiles:
            output_file.write(smile + '\n')

if __name__ == "__main__":
    # Replace 'generated_molecules.txt' and 'pdb.txt' with the actual file paths
    molecule_file_path = '/home/piyush22194/RNN/smiles-gpt-master/smiles-gpt-master/notebooks/generated_molecules.txt'
    pdb_file_path = '/home/piyush22194/RNN/smiles-gpt-master/smiles-gpt-master/notebooks/inhibitor_generated_molecules.txt'

    # Find and save SMILES with less than 100% similarity
    save_smiles_with_non_100_similarity(molecule_file_path, pdb_file_path)


KeyboardInterrupt: 

In [7]:
from rdkit import Chem
from rdkit.Chem import AllChem
import os

def convert_smiles_to_pdb(smiles_file):
    # Read the SMILES file
    with open(smiles_file, 'r') as f:
        smiles_list = f.readlines()

    # Remove newline characters and leading/trailing whitespaces from each SMILES string
    smiles_list = [smiles.strip() for smiles in smiles_list]

    # Get the directory of the SMILES file
    directory = os.path.dirname(smiles_file)

    # Convert each SMILES string to PDB using RDKit
    for i, smiles in enumerate(smiles_list):
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            mol = Chem.AddHs(mol)
            AllChem.EmbedMolecule(mol)
            AllChem.MMFFOptimizeMolecule(mol)
            pdb_file = os.path.join(directory, f"molecule_{i}.pdb")
            Chem.MolToPDBFile(mol, pdb_file)

if __name__ == "__main__":
    smiles_file = "/home/piyush22194/RNN/smiles-gpt-master/smiles-gpt-master/notebooks/non_100_similarity_smiles.txt"
    convert_smiles_to_pdb(smiles_file)


In [10]:
from rdkit import Chem
from rdkit.Chem import AllChem
import os

def convert_smiles_to_pdb(smiles_file):
    # Read the SMILES file
    with open(smiles_file, 'r') as f:
        smiles_list = f.readlines()

    # Remove newline characters and leading/trailing whitespaces from each SMILES string
    smiles_list = [smiles.strip() for smiles in smiles_list]

    # Get the directory of the SMILES file
    directory = os.path.dirname(smiles_file)

    # Convert each SMILES string to PDB using RDKit
    for i, smiles in enumerate(smiles_list):
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            mol = Chem.AddHs(mol)
            try:
                AllChem.EmbedMolecule(mol)
                AllChem.MMFFOptimizeMolecule(mol)
                pdb_file = os.path.join(directory, f"molecule_{i}.pdb")
                Chem.MolToPDBFile(mol, pdb_file)
            except Exception as e:
                print(f"Failed to generate molecule {i + 1} with SMILES: {smiles}")
                print(f"Error message: {str(e)}")

if __name__ == "__main__":
    smiles_file = "/home/piyush22194/RNN/smiles-gpt-master/smiles-gpt-master/notebooks/non_100_similarity_inhibitor_smiles.txt"
    convert_smiles_to_pdb(smiles_file)


Failed to generate molecule 7 with SMILES: Cc1cc(Cl)cnc1C(=O)Nc1ccc(F)c([C@]2([C@@H](F)F)C[C@@H]3C[C@H]3C(N)=N2)c1
Error message: Bad Conformer Id
