In [None]:
import py3Dmol
import pandas as pd
import numpy as np
import plotly.express as px
import requests
import os
import re
import glob
import time

def view_pymol(receptor, ligand, docked, receptor_highlight=None, sticks=False):
  v = py3Dmol.view()
  v.addModel(open(receptor).read())
  if sticks:
    v.setStyle({'cartoon':{},'stick':{'radius':.1}})
  else:
    v.setStyle({'cartoon':{}})
  if receptor_highlight:
    for i in range(receptor_highlight-3, receptor_highlight+3):
      v.setStyle({'model': -1, 'serial': i}, {"cartoon": {'color': 'yellow'}, 'stick':{'radius':.3, 'color':'yellow'}})
  v.addModel(open(ligand).read())
  v.setStyle({'model':1},{'stick':{'colorscheme':'dimgrayCarbon','radius':.125}})
  v.addModelsAsFrames(open(docked).read())
  v.setStyle({'model':2},{'stick':{'colorscheme':'greenCarbon'}})
  v.zoomTo({'model':1})
  v.rotate(90)
  v.animate({'interval':5000})
  return v

from rdkit import Chem
from rdkit.Chem import AllChem
import numpy as np
from Bio.PDB import PDBParser, NeighborSearch

def find_ester_bond(mol):
    for bond in mol.GetBonds():
        atom1, atom2 = bond.GetBeginAtom(), bond.GetEndAtom()
        # Check if the bond is between carbon and oxygen
        if (atom1.GetAtomicNum() == 6 and atom2.GetAtomicNum() == 8) or \
           (atom2.GetAtomicNum() == 6 and atom1.GetAtomicNum() == 8):
            # Identify which atom is carbon and which is oxygen
            c_atom = atom1 if atom1.GetAtomicNum() == 6 else atom2
            o_atom = atom2 if atom1.GetAtomicNum() == 6 else atom1
            
            # Check if the carbon is connected to another oxygen (double-bonded)
            for neighbor in c_atom.GetNeighbors():
                if neighbor.GetAtomicNum() == 8 and neighbor.GetIdx() != o_atom.GetIdx():
                    return bond
    return None

def is_part_of_cyclopropane(atom):
    ring_info = atom.GetOwningMol().GetRingInfo()
    for ring in ring_info.AtomRings():
        if atom.GetIdx() in ring and len(ring) == 3:
            return True
    return False

def calculate_distances(protein_file, ligand_file, residue_number, atom_name):
    # Load the protein structure
    parser = PDBParser()
    structure = parser.get_structure("protein", protein_file)
    
    # Find the specific atom in the protein
    target_atom = None
    for model in structure:
        for chain in model:
            for residue in chain:
                if residue.id[1] == residue_number:
                    for atom in residue:
                        if atom.name == atom_name:
                            target_atom = atom
                            break
    
    if target_atom is None:
        raise ValueError(f"Atom {atom_name} not found in residue {residue_number}")
    
    # Load the ligand with all conformations
    suppl = Chem.SDMolSupplier(ligand_file, removeHs=False)
    
    distances = []
    for mol in suppl:
        if mol is None:
            distances.append(None)
            continue
        
        # Find the ester bond in the ligand
        ester_bond = find_ester_bond(mol)
        
        if ester_bond is None:
            distances.append(None)
            continue
        
        # Calculate the midpoint of the ester bond
        conf = mol.GetConformer()
        pos1 = conf.GetAtomPosition(ester_bond.GetBeginAtomIdx())
        pos2 = conf.GetAtomPosition(ester_bond.GetEndAtomIdx())
        midpoint = ((pos1.x + pos2.x) / 2, (pos1.y + pos2.y) / 2, (pos1.z + pos2.z) / 2)
        
        # Calculate distance
        distance = np.linalg.norm(target_atom.coord - midpoint)
        distances.append(distance)
    
    return distances

def load_docking_results(protein_file, ligand_file, gnina_log_file, residue_number, atom_name):
    # Calculate distances
    distances = calculate_distances(protein_file, ligand_file, residue_number, atom_name)

    # Read gnina log file
    with open(gnina_log_file, 'r') as f:
        log_content = f.read()
    
    # Extract docking results
    pattern = r"^\s*(\d+)\s+([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s+([-\d.]+)\s*$"
    docking_results = re.findall(pattern, log_content, re.MULTILINE)
    
    # Create DataFrame
    df = pd.DataFrame(docking_results, columns=['Mode', 'Affinity', 'Intramol', 'CNN_score', 'CNN_affinity'])
    df = df.astype({'Mode': int, 'Affinity': float, 'Intramol': float, 'CNN_score': float, 'CNN_affinity': float})
    
    # Add distances
    df['Distance'] = pd.Series(distances)
    
    # Reorder columns
    df = df[['Mode', 'Affinity', 'Intramol', 'CNN_score', 'CNN_affinity', 'Distance']]
    
    return df

In [None]:
# Example usage
gene_id = 'AGAP006227'
ligand = 'deltamethrin'

protein_file = f"../receptors/{gene_id}.pdb"
ligand_file = f"../docking/{gene_id}_{ligand}.sdf"
gnina_log_file = f"../docking/{gene_id}_{ligand}.log"
residue_number = 192
atom_name = "OG"

distances = calculate_distances(protein_file, ligand_file, 192, "OG")  # 192 is the residue number, "OG" is the atom name for Serine's hydroxyl oxygen

for i, distance in enumerate(distances, 1):
    print(f"Conformation {i}: Distance = {distance:.2f} Å")

# Calculate and print statistics
if distances:
    print(f"\nMinimum distance: {min(distances):.2f} Å")
    print(f"Maximum distance: {max(distances):.2f} Å")
    print(f"Average distance: {sum(distances) / len(distances):.2f} Å")
else:
    print("No valid conformations found.")

In [None]:
result_df = load_docking_results(protein_file, ligand_file, gnina_log_file, residue_number, atom_name)
result_df