In [None]:
%conda install pdbfixer openmm

In [13]:
from pdbfixer import PDBFixer
from openmm.app import PDBFile

# 初始化PDBFixer
fixer = PDBFixer( filename = "/home/liuyan/projects/package/biorange/biorange/dock/todo/AF-P31749-F1-model_v4.pdb"
)

# 添加缺失的重原子
fixer.findMissingResidues()
fixer.findMissingAtoms()
fixer.addMissingAtoms()

# 添加氢原子
fixer.addMissingHydrogens(7.4)  # pH 7.0

# 保存修复后的结构
PDBFile.writeFile(fixer.topology, fixer.positions, open('output.pdb', 'w'))

In [14]:
# 给从alphafold下载是蛋白质pbd文件加氢
from pdbfixer import PDBFixer
from openmm.app import PDBFile
# openmm的深度学习方法加氢 先进
def fix_pdb(input_path, output_path, ph=7.4):
    """
    修复PDB文件的主要功能
    
    Parameters:
    -----------
    input_path : str
        输入PDB文件路径
    output_path : str
        输出PDB文件路径
    ph : float
        设置pH值，默认7.0
    """
    # 初始化fixer
    fixer = PDBFixer(input_path)
    
    # 查找和添加缺失的残基
    fixer.findMissingResidues()
    
    # 查找缺失的原子
    fixer.findMissingAtoms()
    fixer.addMissingAtoms()
    
    # 添加缺失的氢原子
    fixer.addMissingHydrogens(ph)
    
    # 保存修复后的结构
    PDBFile.writeFile(fixer.topology, fixer.positions, open(output_path, 'w'))
    
    print(f"Structure has been fixed and saved to {output_path}")

# 使用示例
fix_pdb('/home/liuyan/projects/package/biorange/biorange/dock/todo/AF-P05177-F1-model_v4.pdb', 'fixed_output2.pdb')

Structure has been fixed and saved to fixed_output2.pdb


In [16]:
# 绘制分子对接结果的2D图
import MDAnalysis as mda
import prolif as plf
from rdkit import Chem
from typing import Union, List, Optional, Tuple
import pandas as pd


class ProteinLigandAnalyzer:
    """蛋白质-配体相互作用分析器

    用于分析蛋白质与配体之间的相互作用，支持2D和3D可视化。
    每个方法都可以独立运行，无需考虑调用顺序。
    """

    def __init__(self):
        self.protein_mol = None
        self.ligand_mol = None
        self.fingerprint = None

    def _ensure_structures(
        self,
        protein_file: Optional[str] = None,
        ligand_file: Optional[str] = None,
        protein_method: str = "mda",
    ) -> Tuple[plf.Molecule, plf.Molecule]:
        """确保结构已加载，如果没有则加载

        Args:
            protein_file: 可选的蛋白质文件路径
            ligand_file: 可选的配体文件路径
            protein_method: 蛋白质加载方法

        Returns:
            protein_mol, ligand_mol: 处理后的分子对象
        """
        protein_mol = self.protein_mol
        ligand_mol = self.ligand_mol

        if protein_file is not None or protein_mol is None:
            if protein_file is None:
                raise ValueError("Protein file is required")
            if protein_method == "mda":
                u = mda.Universe(protein_file) 
                u.atoms.guess_bonds(vdwradii={"H": 1.05, "O": 1.48})
                protein_mol = plf.Molecule.from_mda(u)
            elif protein_method == "rdkit":
                rdkit_prot = Chem.MolFromPDBFile(protein_file, removeHs=False)
                protein_mol = plf.Molecule(rdkit_prot)
            self.protein_mol = protein_mol

        if ligand_file is not None or ligand_mol is None:
            if ligand_file is None:
                raise ValueError("Ligand file is required")
            ligand_mol = plf.sdf_supplier(ligand_file)[0]
            self.ligand_mol = ligand_mol

        return protein_mol, ligand_mol

    def _ensure_fingerprint(
        self, protein_mol: plf.Molecule, ligand_mol: plf.Molecule, count: bool = True
    ) -> plf.Fingerprint:
        """确保指纹已生成，如果没有则生成

        Args:
            protein_mol: 蛋白质分子对象
            ligand_mol: 配体分子对象
            count: 是否计数所有可能的相互作用组合

        Returns:
            fingerprint: 生成的指纹对象
        """
        fingerprint = plf.Fingerprint(count=count)
        fingerprint.run_from_iterable([ligand_mol], protein_mol)
        self.fingerprint = fingerprint
        return fingerprint

    def get_interaction_df(
        self,
        protein_file: Optional[str] = None,
        ligand_file: Optional[str] = None,
        protein_method: str = "mda",
        count: bool = True,
    ) -> pd.DataFrame:
        """获取相互作用数据框

        Args:
            protein_file: 可选的蛋白质文件路径
            ligand_file: 可选的配体文件路径
            protein_method: 蛋白质加载方法
            count: 是否计数所有可能的相互作用组合

        Returns:
            包含相互作用信息的DataFrame
        """
        protein_mol, ligand_mol = self._ensure_structures(
            protein_file, ligand_file, protein_method
        )
        fingerprint = self._ensure_fingerprint(protein_mol, ligand_mol, count)
        return fingerprint.to_dataframe()

    def visualize_2d(
        self,
        protein_file: Optional[str] = None,
        ligand_file: Optional[str] = None,
        protein_method: str = "mda",
        display_all: bool = True,
        count: bool = True,
    ):
        """生成2D交互网络图

        Args:
            protein_file: 可选的蛋白质文件路径
            ligand_file: 可选的配体文件路径
            protein_method: 蛋白质加载方法
            display_all: 是否显示所有可能的相互作用
            count: 是否计数所有可能的相互作用组合

        Returns:
            2D网络图视图对象
        """
        protein_mol, ligand_mol = self._ensure_structures(
            protein_file, ligand_file, protein_method
        )
        fingerprint = self._ensure_fingerprint(protein_mol, ligand_mol, count)
        return fingerprint.plot_lignetwork(
            ligand_mol, kind="frame", frame=0, display_all=display_all
        )

    def visualize_3d(
        self,
        protein_file: Optional[str] = None,
        ligand_file: Optional[str] = None,
        protein_method: str = "mda",
        display_all: bool = False,
        count: bool = True,
    ):
        """生成3D结构视图

        Args:
            protein_file: 可选的蛋白质文件路径
            ligand_file: 可选的配体文件路径
            protein_method: 蛋白质加载方法
            display_all: 是否显示所有可能的相互作用
            count: 是否计数所有可能的相互作用组合

        Returns:
            3D结构视图对象
        """
        protein_mol, ligand_mol = self._ensure_structures(
            protein_file, ligand_file, protein_method
        )
        fingerprint = self._ensure_fingerprint(protein_mol, ligand_mol, count)
        return fingerprint.plot_3d(
            ligand_mol, protein_mol, frame=0, display_all=display_all
        )


if __name__ == "__main__":
    protein_file = "/home/liuyan/projects/package/biorange/notebooks/fixed_output2.pdb"
    ligand_file = "/home/liuyan/projects/package/biorange/biorange/dock/todo/ligand1_docked__4e01f06f-33fd-45cd-bd88-15bb4e3e3d59_1.sdf"
    view_2d = ProteinLigandAnalyzer().visualize_2d(protein_file, ligand_file)
    with open(
        "results/dock/saving4.html", "w", encoding="utf-8"
    ) as f:  #  保存逻辑还可以加紧类里面，设置一个save参数。
        f.write(view_2d.data)  # 使用.data获取HTML内容

    # view_3d = ProteinLigandAnalyzer().visualize_3d(protein_file, ligand_file)
    # view_3d.write_html(
    #     "results/dock/dd3.html"
    # )  # 非jupyter 不好交互式查看，建议笔记本中交互式看





  0%|          | 0/1 [00:00<?, ?it/s]

In [28]:
%pip install py3Dmol

Looking in indexes: https://pypi.tuna.tsinghua.edu.cn/simple
Collecting py3Dmol
  Using cached https://pypi.tuna.tsinghua.edu.cn/packages/63/a6/ccb9b29ad5aa0857b140426a0429ed363e5856513303ddcb233b30906bb1/py3Dmol-2.4.0-py2.py3-none-any.whl (7.0 kB)
Installing collected packages: py3Dmol
Successfully installed py3Dmol-2.4.0
Note: you may need to restart the kernel to use updated packages.


In [None]:
# @用奥上面这个流程的话  从alphazflod 下载蛋白质 没问题了 sdf分割那边还是不行 我来试试重新分割h

In [15]:
# 分割

from openbabel import pybel
from pathlib import Path

# 将 SDF 文件拆分为单独的分子文件
def split_sdf_file(sdf_path):
    """
    Split an SDF file into seperate files for each molecule.
    Each file is named with consecutive numbers.

    Parameters
    ----------
    sdf_path: str or pathlib.Path
        Path to SDF file that should be split.
    """
    sdf_path = Path(sdf_path)
    stem = sdf_path.stem
    parent = sdf_path.parent
    molecules = pybel.readfile("sdf", str(sdf_path))
    # 将每个分子写入单独的 SDF 文件
    for i, molecule in enumerate(molecules, 1):
        molecule.write("sdf", str(parent / f"{stem}_{i}.sdf"), overwrite=True)
    return
# 使用示例
split_sdf_file("/home/liuyan/projects/package/biorange/biorange/dock/todo/ligand1_docked__4e01f06f-33fd-45cd-bd88-15bb4e3e3d59")

In [16]:

# 拆分 SDF 文件

# 分割后放回原地，由于sdf这些玩意都不是经典的python对象 所以直接操作文件没得问题
split_sdf_file("/home/liuyan/projects/package/biorange/biorange/dock/todo/Conformer3D_COMPOUND_CID_6041.sdf")

In [None]:
# 用加氢的

In [None]:
# conda 安装成功了倒是， pip 那边真奇怪  每个都准备好了 还是说不
