In [None]:
import os
import prody as pr
import networkx as nx
import numpy as np
import requests
import gzip
import shutil
import pandas as pd
from scipy.spatial.distance import cdist
import math
import matplotlib.pyplot as plt
import matplotlib.colors as mcolors
import plotly.graph_objects as go
from Bio import PDB
import py3Dmol
from pyvis.network import Network
from IPython.core.display import display, HTML

# Function to fetch molecular weight from PubChem API
def get_molecular_weight(ligand_id):
    url = f"https://pubchem.ncbi.nlm.nih.gov/rest/pug/compound/name/{ligand_id}/property/MolecularWeight/JSON"
    try:
        response = requests.get(url)
        response.raise_for_status()
        data = response.json()
        return float(data["PropertyTable"]["Properties"][0]["MolecularWeight"])
    except:
        # Adaptive default based on ligand size
        if len(ligand_id) <= 3:  # Likely a small ion (e.g., Na+, Cl-)
            return 50.0
        else:  # Small organic molecule
            return 180.0  

def download_and_extract_cif(pdb_id):
    pdb_id = pdb_id.upper().strip()
    url = f"https://files.rcsb.org/download/{pdb_id}-assembly1.cif.gz"
    cif_gz_file = f"{pdb_id}-assembly1.cif.gz"
    cif_file = f"{pdb_id}-assembly1.cif"

    response = requests.get(url)
    if response.status_code != 200:
        print(f"Error: No Biological Assembly CIF file found for {pdb_id} on RCSB PDB.")
        return None
    
    with open(cif_gz_file, "wb") as f:
        f.write(response.content)
    print(f"Downloaded CIF file: {cif_gz_file}")

    with gzip.open(cif_gz_file, "rb") as f_in, open(cif_file, "wb") as f_out:
        shutil.copyfileobj(f_in, f_out)
    print(f"Extracted CIF file: {cif_file}")

    os.remove(cif_gz_file)
    return cif_file

def convert_cif_to_pdb(cif_file, pdb_output):
    structure = pr.parseMMCIF(cif_file)
    pr.writePDB(pdb_output, structure)
    print(f"Converted {cif_file} → {pdb_output}")
    return pdb_output

def get_ligand_residues(pdb_file):
    """Extracts ligand residue names from a PDB file (excluding proteins & water)."""
    structure = pr.parsePDB(pdb_file)
    ligand_atoms = structure.select("not protein and not water")
    
    if ligand_atoms is None:
        return []
    
    return list(set(ligand_atoms.getResnames()))  # Get unique ligand names

def visualize_pdb(pdb_file, highlight_residues=None):
    """Visualizes protein structure with ligands and highlights a specific residue."""
    if not os.path.exists(pdb_file):
        print(f"Error: File {pdb_file} does not exist.")
        return
    
    with open(pdb_file, "r") as f:
        pdb_content = f.read()
    
    view = py3Dmol.view(width=800, height=600)
    view.addModel(pdb_content, "pdb")
    view.setStyle({"cartoon": {"color": "spectrum"}})

    # Highlight ligand residues
    ligand_residues = get_ligand_residues(pdb_file)
    if ligand_residues:
        for ligand in ligand_residues:
            view.addStyle({"resn": ligand, "hetflag": True}, 
                          {"stick": {"colorscheme": "greenCarbon", "radius": 0.3}})

        # Highlight Multiple Residues with Different Colors
    if highlight_residues:
        for res in highlight_residues:
            chain, resname, resnum, color = res  # Example: ("A", "ASP", 50, "red")
            view.addStyle(
                {"chain": chain, "resn": resname, "resi": str(resnum)},
                {"sphere": {"color": color, "radius": 1.5}}
            )
    
    view.zoomTo()
    return view.show()

# Example Usage:
# visualize_pdb("example.pdb", highlight_residue=("A", "ASP", 50))  # Highlights ASP 50 in Chain A


def process_pdb_id(pdb_id, save_directory="."):
    cif_file = download_and_extract_cif(pdb_id)
    if cif_file:
        pdb_output = os.path.join(save_directory, f"{pdb_id}.pdb")
        pdb_file = convert_cif_to_pdb(cif_file, pdb_output)
        #visualize_pdb(pdb_file)
        highlight_residues = [
            ("A", "VAL", 122, "green"),   # Highlight ASP 50 in red
            ("A", "ASP", 170, "red"), # Highlight LYS 120 in blue
        ]
        visualize_pdb(pdb_file, highlight_residues=highlight_residues)
        analyze_protein_network(pdb_file, pdb_id)



# Function to detect ligand-protein hydrogen bonds
def detect_hydrogen_bonds(protein_atoms, ligand_atoms, distance_cutoff=3.5):
    """Detects hydrogen bonds only between ligand and protein atoms."""
    hbond_pairs = []

    if protein_atoms is None or ligand_atoms is None:
        print("Warning: No valid protein or ligand atoms found. Skipping H-bond detection.")
        return hbond_pairs  # Return empty list

    protein_atoms_filtered = protein_atoms.select("element N O")
    ligand_atoms_filtered = ligand_atoms.select("element N O")

    if protein_atoms_filtered is None or ligand_atoms_filtered is None:
        print("Warning: No donor/acceptor atoms found in protein or ligand.")
        return hbond_pairs  # Return empty list

    for ligand_atom in ligand_atoms_filtered:
        for protein_atom in protein_atoms_filtered:
            distance = pr.calcDistance(ligand_atom, protein_atom)
            if distance <= distance_cutoff:
                ligand_res_id = f"{ligand_atom.getChid()}-{ligand_atom.getResname()}{ligand_atom.getResnum()}"
                protein_res_id = f"{protein_atom.getChid()}-{protein_atom.getResname()}{protein_atom.getResnum()}"
                hbond_pairs.append((ligand_res_id, protein_res_id))

    return hbond_pairs[:200]  # Limit max bonds to 200 for efficiency

# Function to analyze protein-ligand network
def analyze_protein_network(pdb_file, pdb_id, distance_cutoff=7.0, network_type="CA"):
    structure = pr.parsePDB(pdb_file)
    
    protein_atoms = structure.select("name CA") if network_type == "CA" else structure.select("name CB")
    ligand_atoms = structure.select("not protein and not water")

    if protein_atoms is None:
        print(" Warning: No protein atoms found in structure.")
    if ligand_atoms is None:
        print(" Warning: No ligand atoms found in structure.")

    G = nx.Graph()
    node_positions = {}

    # 🔹 Add Protein Nodes & Edges
    if protein_atoms is not None:
        for res in protein_atoms:
            res_id = f"{res.getChid()}-{res.getResname()}{res.getResnum()}"
            coords = res.getCoords()
            node_positions[res_id] = coords
            G.add_node(res_id)

        for i, res1 in enumerate(protein_atoms):
            for j, res2 in enumerate(protein_atoms):
                if i < j:
                    distance = pr.calcDistance(res1, res2)
                    if distance < distance_cutoff:
                        res1_id = f"{res1.getChid()}-{res1.getResname()}{res1.getResnum()}"
                        res2_id = f"{res2.getChid()}-{res2.getResname()}{res2.getResnum()}"
                        G.add_edge(res1_id, res2_id, weight=2.0)

    # Normalize Protein-Protein Edge Weights
    if len(G.edges) > 0:
        avg_weight = np.mean([d["weight"] for u, v, d in G.edges(data=True)])
        for u, v, d in G.edges(data=True):
            d["weight"] /= avg_weight * 10.0  

    # Add Ligand Nodes & Edges using Hydrogen Bonds
    if ligand_atoms is not None and protein_atoms is not None:
        hbond_pairs = detect_hydrogen_bonds(protein_atoms, ligand_atoms)

        for ligand_res, protein_res in hbond_pairs:
            if ligand_res in G.nodes and protein_res in G.nodes:
                G.add_edge(ligand_res, protein_res, weight=1.0)


    # Step 3: Compute Centrality Measures
    degree_centrality = nx.degree_centrality(G)
    betweenness_centrality = nx.betweenness_centrality(G, weight="weight")
    closeness_centrality = nx.closeness_centrality(G)
    eigenvector_centrality = nx.eigenvector_centrality_numpy(G, weight="weight")
    pagerank_centrality = nx.pagerank(G, alpha=0.85)

    # Compute Network Centrality Measures
    centralities = {
        "degree": nx.degree_centrality(G),
        "betweenness_centrality": nx.betweenness_centrality(G, weight="weight"),
        "closeness_centrality": nx.closeness_centrality(G),
        "eigenvector_centrality": nx.eigenvector_centrality_numpy(G, weight="weight"),
        "pageRank_centrality": nx.pagerank(G, alpha=0.85)
    }

    # Save to Excel
    df = pd.DataFrame([[node] + [centralities[m][node] for m in centralities] for node in G.nodes],
                      columns=["Node"] + list(centralities.keys()))
    df.to_excel(f"{pdb_id}_network_centrality_with_ligands.xlsx", index=False)

    print(f"Network centrality measures saved to {pdb_id}_network_centrality_with_ligands.xlsx")

    plot_network_3d(G, betweenness_centrality, "Betweenness Centrality", node_positions)
    plot_network_pyvis(G, betweenness_centrality, "Betweenness Centrality")
    #plot_network_pyvis(G, betweenness_centrality, "Betweenness Centrality")

def plot_network_3d(G, centrality_measure, centrality_name, node_positions):
    node_x, node_y, node_z, node_color, node_text = [], [], [], [], []
    for node, coords in node_positions.items():
        node_x.append(coords[0])
        node_y.append(coords[1])
        node_z.append(coords[2])
        node_color.append(centrality_measure.get(node, 0))
        node_text.append(f"Residue: {node}<br>{centrality_name}: {centrality_measure.get(node, 0):.4f}")
    
    edge_x, edge_y, edge_z = [], [], []
    for edge in G.edges():
        res1, res2 = edge
        x0, y0, z0 = node_positions[res1]
        x1, y1, z1 = node_positions[res2]
        edge_x.extend([x0, x1, None])
        edge_y.extend([y0, y1, None])
        edge_z.extend([z0, z1, None])
    
    fig = go.Figure()
    fig.add_trace(go.Scatter3d(
        x=edge_x, y=edge_y, z=edge_z,
        mode='lines',
        line=dict(color='gray', width=2),
        hoverinfo='none',
        showlegend=False
    ))
    fig.add_trace(go.Scatter3d(
        x=node_x, y=node_y, z=node_z,
        mode='markers',
        marker=dict(
            size=8,
            color=node_color,
            colorscale='Plasma',
            colorbar=dict(title=centrality_name, len=0.5),
            opacity=0.8
        ),
        text=node_text,
        hoverinfo='text',
        showlegend=False
    ))
    
    fig.update_layout(
        title=f'Protein Residue Interaction Network ({centrality_name})',
        scene=dict(
            xaxis_title='X',
            yaxis_title='Y',
            zaxis_title='Z'
        ),
        margin=dict(l=0, r=0, b=0, t=80)
    )
    
    fig.show()

def plot_network_pyvis(G, centrality_measure, centrality_name):
    """Plots an interactive Pyvis network visualization with ligands explicitly shown."""

    net = Network(notebook=True, height="600px", width="1000px", bgcolor="white", font_color="black", directed=False)

    # Define the colormap and normalization
    cmap = plt.get_cmap("coolwarm")
    norm = mcolors.Normalize(vmin=min(centrality_measure.values()), vmax=max(centrality_measure.values()))

    # Add nodes to the network with distinct colors and bold labels for ligands
    for node in G.nodes():
        if node.startswith("LIG-"):  
            color = "green"  # Ligands are explicitly green
            size = 30  # Make ligands larger
            font_size = 20  # Increase font size for ligand labels
            font_style = {"size": font_size, "color": "black", "bold": True}  # Apply Pyvis bold
        else:
            color = mcolors.to_hex(cmap(norm(centrality_measure[node])))
            size = 15  # Default size for protein residues
            font_style = {"size": 12, "color": "black", "bold": False}  # Normal font

        # Add node with styling
        net.add_node(
            node, 
            label=f"{node} ({centrality_measure.get(node, 0):.4f})", 
            size=size, 
            color=color, 
            font=font_style
        )

    # Add edges to the network
    for edge in G.edges():
        net.add_edge(edge[0], edge[1])

    # Generate and display the network
    net.show("protein_network.html")
    display(HTML("<iframe src='protein_network.html' width='1000px' height='600px'></iframe>"))


pdb_id = "3ZHB"
process_pdb_id(pdb_id)


  from IPython.core.display import display, HTML
@> 4161 atoms and 1 coordinate set(s) were parsed in 0.06s.
@> 4161 atoms and 1 coordinate set(s) were parsed in 0.03s.


Downloaded CIF file: 3ZHB-assembly1.cif.gz
Extracted CIF file: 3ZHB-assembly1.cif
Converted 3ZHB-assembly1.cif → ./3ZHB.pdb


@> 4161 atoms and 1 coordinate set(s) were parsed in 0.04s.


✅ Network centrality measures saved to 3ZHB_network_centrality_with_ligands.xlsx


protein_network.html



Consider using IPython.display.IFrame instead

