In [None]:
%%capture
!pip install chemdataextractor

In [None]:
%%capture
!pip install rdkit

In [None]:
%%capture
!pip install py3Dmol

In [None]:
import chemdataextractor as cde

In [None]:
def get_compounds(pdf):
    compounds = []
    doc = cde.Document.from_file(pdf)
    chemical_entities = doc.records.serialize()
    for entity in chemical_entities:
        if 'names' in entity:
            compounds.extend(entity['names'])
    return compounds
#Вписать путь к статье 
compounds = get_compounds('Article.pdf')

Пример с pubchempy

In [None]:
%%capture
!pip install PyMuPDF pubchempy

In [None]:
import pubchempy as pcp

def resolve_smiles(compound_name):
    try:
        compound = pcp.get_compounds(compound_name, 'name')
        if compound:
            return compound[0].canonical_smiles
        else:
            print(f"Соединение '{compound_name}' не найдено в базе данных PubChem.")
            return None
    except Exception as e:
        print(f"Ошибка при получении SMILES для соединения '{compound_name}': {str(e)}")
        return None

    
compound_names = compounds

smiles_list = []
d = {}
for compound_name in compound_names:
    smiles = resolve_smiles(compound_name)
    if smiles is not None:
        smiles_list.append(smiles)
        d[compound_name] = smiles

In [None]:
d

In [None]:
from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem.rdMolDescriptors import CalcNumHBA, CalcNumHBD
from rdkit.Chem.Draw import IPythonConsole
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
import numpy as np
import py3Dmol

rdDepictor.SetPreferCoordGen(True)


def check_hydrogen_bonds(mol):
    hba = CalcNumHBA(mol)  
    hbd = CalcNumHBD(mol)  
    return hba > 0 and hbd > 0


def visualize_molecule(smiles):
    mol = Chem.MolFromSmiles(smiles)

    if mol is None:
        print(f"Не удалось создать молекулу из SMILES: {smiles}")
    else:
        if check_hydrogen_bonds(mol):
            print(f"Визуализация молекулы в 2D:")
            display(Draw.MolToImage(mol))

            mol = Chem.AddHs(mol)

            params = AllChem.ETKDGv3()
            params.maxAttempts = 1000 
            res = AllChem.EmbedMolecule(mol, params)

            if res == -1:
                print(f"Не удалось сгенерировать конформер для молекулы: {smiles}")
            else:
                try:
                    AllChem.MMFFOptimizeMolecule(mol)
                except ValueError as e:
                    print(f"Ошибка при оптимизации: {str(e)}")

                print(f"Визуализация молекулы в 3D:")
                mb = Chem.MolToMolBlock(mol)

                viewer = py3Dmol.view(width=400, height=400)
                viewer.addModel(mb, 'mol')
                viewer.setStyle({'stick': {}})
                viewer.setBackgroundColor('white')
                viewer.zoomTo()
                viewer.show()

    
for compound_name, smiles in zip(compound_names, smiles_list):
    if visualize_molecule(smiles) is not None:
        print(f"{compound_name}: {visualize_molecule(compound_name, smiles)}")

Пример с библиотекой ChemSpider

In [None]:
%%capture
!pip install chemspipy

In [None]:
import chemspipy
from chemspipy import ChemSpider
from rdkit import Chem
from rdkit.Chem import Draw

API_KEY = ''

cs = ChemSpider(API_KEY)

def resolve_smiles(compound_name):

    try:
        
        results = cs.search(compound_name)
        if results:
            compound = results[0]
            return compound.smiles
        else:
            print(f"Соединение '{compound_name}' не найдено в базе данных ChemSpider.")
            return None
    except Exception as e:
        print(f"Ошибка при получении SMILES для соединения '{compound_name}': {str(e)}")
        return None

compound_names = compounds

smiles_list = []
c = {}
for compound_name in compound_names:
    smiles = resolve_smiles(compound_name)
    if smiles is not None:
        smiles_list.append(smiles)
        c[compound_name] = smiles


for compound_name, smiles in c.items():
    if smiles:
        mol = Chem.MolFromSmiles(smiles)
        if mol:
            print(f"{compound_name}: {smiles}")
            display(Draw.MolToImage(mol))
        else:
            print(f"Не удалось создать молекулу из SMILES для соединения '{compound_name}'.")
    else:
        print(f"SMILES для соединения '{compound_name}' не найден.")
