<a href="https://colab.research.google.com/github/nibaskumar93n-debug/Morphoinformatics/blob/main/Druggable_pocket.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install py3Dmol biopython requests pandas openpyxl -q

import requests
import py3Dmol
from collections import Counter
from Bio.PDB import PDBParser
import pandas as pd

uniprot_id = "Q5LHT1"

pocket_residues = [138, 141, 142, 196, 199, 200, 203, 206, 207, 208, 209, 231, 232, 233,
                   254, 255, 256, 257, 258, 259, 260, 262, 270, 271, 272, 273, 276, 293,
                   295, 297, 298, 299, 300, 302, 307, 310, 311, 314, 315, 318, 319, 320,
                   323, 324, 334, 335, 336, 337, 338, 339, 340, 343, 351, 353, 354, 355,
                   356, 357, 358, 360, 361, 362, 363, 364, 365, 366, 367, 369, 370, 371,
                   373, 374, 377, 378, 379, 380, 381, 384, 385, 386, 387, 388, 389, 390,
                   391, 392, 394, 395, 398, 400, 401, 402, 403, 404]

# Fetch using AlphaFold API
print(f"Fetching AlphaFold structure for {uniprot_id}...")

api_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
response = requests.get(api_url)

if response.status_code == 200:
    data = response.json()
    pdb_url = data[0]['pdbUrl']

    # Download structure
    pdb_response = requests.get(pdb_url)
    pdb_file = f"{uniprot_id}.pdb"

    with open(pdb_file, 'wb') as f:
        f.write(pdb_response.content)

    print(f"‚úÖ Successfully downloaded: {pdb_file}\n")

    # Parse the structure
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure(uniprot_id, pdb_file)

    # Get protein information from UniProt
    uniprot_url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    uniprot_response = requests.get(uniprot_url)

    protein_name = "Unknown"
    organism = "Unknown"

    if uniprot_response.status_code == 200:
        uniprot_data = uniprot_response.json()

        # Get protein name
        if 'proteinDescription' in uniprot_data:
            if 'recommendedName' in uniprot_data['proteinDescription']:
                protein_name = uniprot_data['proteinDescription']['recommendedName']['fullName']['value']
            elif 'submittedName' in uniprot_data['proteinDescription']:
                protein_name = uniprot_data['proteinDescription']['submittedName'][0]['fullName']['value']

        organism = uniprot_data.get('organism', {}).get('scientificName', 'Unknown')

    # Extract residue details from pocket
    pocket_residue_details = []

    for model in structure:
        for chain in model:
            chain_id = chain.get_id()
            for residue in chain:
                res_num = residue.get_id()[1]
                if res_num in pocket_residues:
                    res_name = residue.get_resname()
                    pocket_residue_details.append({
                        'chain': chain_id,
                        'resnum': res_num,
                        'resname': res_name
                    })

    # Sort by residue number
    pocket_residue_details.sort(key=lambda x: x['resnum'])

    # Generate pocket name based on characteristics
    residue_names = [r['resname'] for r in pocket_residue_details]

    # Classify residues
    hydrophobic = ['VAL', 'LEU', 'ILE', 'PHE', 'TRP', 'MET', 'ALA', 'PRO']
    positive = ['LYS', 'ARG', 'HIS']
    negative = ['ASP', 'GLU']
    polar = ['SER', 'THR', 'ASN', 'GLN', 'CYS', 'TYR', 'GLY']
    aromatic = ['PHE', 'TYR', 'TRP', 'HIS']

    hydrophobic_count = sum(1 for r in residue_names if r in hydrophobic)
    positive_count = sum(1 for r in residue_names if r in positive)
    negative_count = sum(1 for r in residue_names if r in negative)
    polar_count = sum(1 for r in residue_names if r in polar)
    aromatic_count = sum(1 for r in residue_names if r in aromatic)

    total = len(residue_names)

    # Determine pocket type
    pocket_type = []
    if hydrophobic_count / total > 0.4:
        pocket_type.append("Hydrophobic")
    if aromatic_count >= 3:
        pocket_type.append("Aromatic")
    if positive_count > negative_count + 1:
        pocket_type.append("Cationic")
    elif negative_count > positive_count + 1:
        pocket_type.append("Anionic")
    if polar_count / total > 0.3:
        pocket_type.append("Polar")

    if not pocket_type:
        pocket_type = ["Mixed"]

    pocket_name = f"{'-'.join(pocket_type)} Binding Pocket"

    # ========================================
    # CREATE TABLES
    # ========================================

    print("="*80)
    print(" "*25 + "POCKET ANALYSIS REPORT")
    print("="*80)

    # Table 1: Protein Information
    protein_info = {
        'Property': ['UniProt ID', 'Protein Name', 'Organism', 'Chain'],
        'Value': [uniprot_id, protein_name, organism, pocket_residue_details[0]['chain']]
    }
    df_protein = pd.DataFrame(protein_info)

    print("\nüìã PROTEIN INFORMATION")
    print("-"*80)
    print(df_protein.to_string(index=False))

    # Table 2: Pocket Summary
    pocket_summary = {
        'Property': ['Pocket Name', 'Location', 'Total Residues', 'Net Charge',
                     'Hydrophobic Ratio', 'Polar Ratio', 'Aromatic Count'],
        'Value': [
            pocket_name,
            f"Residues {min(pocket_residues)}-{max(pocket_residues)}",
            f"{total} residues",
            f"{positive_count - negative_count:+d}",
            f"{hydrophobic_count/total:.2f}",
            f"{polar_count/total:.2f}",
            f"{aromatic_count}"
        ]
    }
    df_pocket = pd.DataFrame(pocket_summary)

    print("\nüîç POCKET SUMMARY")
    print("-"*80)
    print(df_pocket.to_string(index=False))

    # Table 3: Pocket Composition
    composition_data = {
        'Residue Type': ['Hydrophobic', 'Positive Charged', 'Negative Charged', 'Polar Uncharged', 'Aromatic'],
        'Count': [hydrophobic_count, positive_count, negative_count, polar_count, aromatic_count],
        'Percentage': [
            f"{hydrophobic_count/total*100:.1f}%",
            f"{positive_count/total*100:.1f}%",
            f"{negative_count/total*100:.1f}%",
            f"{polar_count/total*100:.1f}%",
            f"{aromatic_count/total*100:.1f}%"
        ],
        'Color Code': ['üü° Yellow', 'üîµ Blue', 'üî¥ Red', 'üü¢ Green', 'üü£ Magenta']
    }
    df_composition = pd.DataFrame(composition_data)

    print("\nüìä POCKET COMPOSITION")
    print("-"*80)
    print(df_composition.to_string(index=False))

    # Table 4: Top Residues
    res_counter = Counter(residue_names)
    top_residues = res_counter.most_common(10)

    residue_freq = {
        'Residue': [res for res, count in top_residues],
        'Count': [count for res, count in top_residues],
        'Frequency': [f"{count/total*100:.1f}%" for res, count in top_residues]
    }
    df_residues = pd.DataFrame(residue_freq)

    print("\nüß¨ TOP RESIDUES IN POCKET")
    print("-"*80)
    print(df_residues.to_string(index=False))

    # Table 5: Complete Residue List
    residue_table_data = []
    for res in pocket_residue_details:
        # Determine type
        res_type = "Other"
        if res['resname'] in hydrophobic:
            res_type = "Hydrophobic"
        elif res['resname'] in positive:
            res_type = "Positive"
        elif res['resname'] in negative:
            res_type = "Negative"
        elif res['resname'] in polar:
            res_type = "Polar"

        residue_table_data.append({
            'Position': res['resnum'],
            'Residue': res['resname'],
            'Type': res_type,
            'Chain': res['chain']
        })

    df_all_residues = pd.DataFrame(residue_table_data)

    print("\nüìù COMPLETE RESIDUE LIST")
    print("-"*80)
    print(df_all_residues.head(20).to_string(index=False))
    if len(df_all_residues) > 20:
        print(f"... and {len(df_all_residues) - 20} more residues")

    # Table 6: Formatted Pocket String
    formatted_residues = []
    for residue in pocket_residue_details:
        formatted_residues.append(f"{residue['resname']}_{residue['resnum']}_{residue['chain']}")

    pocket_string = "".join(formatted_residues)

    df_pocket_string = pd.DataFrame({
        'Formatted Pocket String': [pocket_string]
    })

    print("\nüìã FORMATTED POCKET STRING")
    print("-"*80)
    print(f"{pocket_string[:100]}...")

    # ========================================
    # SAVE EXCEL FILE WITH MULTIPLE SHEETS
    # ========================================

    excel_filename = f"{uniprot_id}_pocket_analysis.xlsx"

    with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
        df_protein.to_excel(writer, sheet_name='Protein Info', index=False)
        df_pocket.to_excel(writer, sheet_name='Pocket Summary', index=False)
        df_composition.to_excel(writer, sheet_name='Composition', index=False)
        df_residues.to_excel(writer, sheet_name='Top Residues', index=False)
        df_all_residues.to_excel(writer, sheet_name='All Residues', index=False)
        df_pocket_string.to_excel(writer, sheet_name='Formatted String', index=False)

    print(f"\n‚úÖ Excel file saved: {excel_filename}")

    # ========================================
    # CREATE PYMOL SCRIPT
    # ========================================

    pymol_script = f"""# PyMOL Script for Pocket Visualization
# Protein: {protein_name}
# UniProt ID: {uniprot_id}
# Pocket: {pocket_name}

# Load structure
load {pdb_file}

# Remove waters and hetero atoms
remove solvent
remove organic

# Select pocket residues
select pocket, resi {'+'.join(map(str, pocket_residues))}

# Basic styling
hide everything
show cartoon
color gray80, all

# Show pocket surface
show surface, pocket
color cyan, pocket
set transparency, 0.4, pocket

# Color residues by type

# Hydrophobic residues (Yellow)
select hydrophobic, pocket and (resn VAL+LEU+ILE+PHE+TRP+MET+ALA+PRO)
show sticks, hydrophobic
color yellow, hydrophobic
util.cbay hydrophobic

# Positively charged residues (Blue)
select positive, pocket and (resn LYS+ARG+HIS)
show sticks, positive
color blue, positive
util.cbab positive

# Negatively charged residues (Red)
select negative, pocket and (resn ASP+GLU)
show sticks, negative
color red, negative
util.cbar negative

# Polar uncharged residues (Green)
select polar, pocket and (resn SER+THR+ASN+GLN+CYS+TYR+GLY)
show sticks, polar
color green, polar
util.cbag polar

# Aromatic residues (Magenta highlight)
select aromatic, pocket and (resn PHE+TYR+TRP+HIS)
show sticks, aromatic

# Visual settings
bg_color white
set ray_shadows, 0
set antialias, 2
set cartoon_fancy_helices, 1
set cartoon_smooth_loops, 1
set stick_radius, 0.3

# Center view on pocket
zoom pocket
orient pocket

# Label the pocket
pseudoatom pocket_label, pos=[0,0,0], selection=pocket
label pocket_label, "{pocket_name}"
set label_size, 20
set label_color, black

print "="*60
print "Pocket Visualization Loaded!"
print "="*60
print "Color Legend:"
print "  Yellow = Hydrophobic"
print "  Blue   = Positive charged"
print "  Red    = Negative charged"
print "  Green  = Polar"
print "  Cyan   = Pocket surface"
print "="*60

# Commands to save high-quality images:
# For publication quality PNG (300 DPI):
# ray 2400, 2400
# png {uniprot_id}_pocket_highres.png, dpi=300

# For standard PNG:
# png {uniprot_id}_pocket.png, width=1200, height=1200

# For publication quality (ray-traced):
# set ray_trace_mode, 1
# ray 3000, 3000
# png {uniprot_id}_pocket_raytrace.png, dpi=300
"""

    pymol_script_file = f"{uniprot_id}_pymol_visualization.pml"
    with open(pymol_script_file, 'w') as f:
        f.write(pymol_script)

    print(f"‚úÖ PyMOL script saved: {pymol_script_file}")

    # ========================================
    # CREATE HTML INTERACTIVE VISUALIZATION
    # ========================================

    # Read PDB data
    with open(pdb_file, 'r') as f:
        pdb_data = f.read()

    # Create 3D visualization
    view = py3Dmol.view(width=1000, height=800)
    view.addModel(pdb_data, 'pdb')

    # Show protein
    view.setStyle({'cartoon': {'color': 'lightgray'}})

    # Highlight pocket
    view.addSurface(py3Dmol.VDW,
                    {'opacity': 0.4, 'color': 'cyan'},
                    {'resi': pocket_residues})

    # Color residues by type - Hydrophobic (Yellow)
    view.addStyle({'resi': pocket_residues, 'resn': ['VAL', 'LEU', 'ILE', 'PHE', 'TRP', 'MET', 'ALA', 'PRO']},
                  {'stick': {'color': 'yellow', 'radius': 0.3}})

    # Positive charged (Blue)
    view.addStyle({'resi': pocket_residues, 'resn': ['LYS', 'ARG', 'HIS']},
                  {'stick': {'color': 'blue', 'radius': 0.3}})

    # Negative charged (Red)
    view.addStyle({'resi': pocket_residues, 'resn': ['ASP', 'GLU']},
                  {'stick': {'color': 'red', 'radius': 0.3}})

    # Polar (Green)
    view.addStyle({'resi': pocket_residues, 'resn': ['SER', 'THR', 'ASN', 'GLN', 'CYS', 'TYR', 'GLY']},
                  {'stick': {'color': 'green', 'radius': 0.3}})

    # Add label
    view.addLabel(pocket_name,
                  {'fontColor': 'black', 'fontSize': 18, 'backgroundColor': 'white', 'backgroundOpacity': 0.9},
                  {'resi': pocket_residues[0]})

    view.zoomTo({'resi': pocket_residues})
    view.setBackgroundColor('white')

    # Save as HTML
    html_filename = f"{uniprot_id}_pocket_interactive.html"
    view.write_html(html_filename)

    print(f"‚úÖ Interactive HTML saved: {html_filename}")

    # ========================================
    # SUMMARY
    # ========================================

    print("\n" + "="*80)
    print("‚úÖ ANALYSIS COMPLETE!")
    print("="*80)
    print(f"\nüì¶ Files Generated:")
    print(f"   1. üìä {excel_filename}")
    print(f"      - Contains 6 sheets with all analysis data")
    print(f"\n   2. üß™ {pymol_script_file}")
    print(f"      - Load in PyMOL: File ‚Üí Run Script ‚Üí Select this file")
    print(f"      - Or in PyMOL command: @{pymol_script_file}")
    print(f"      - To save image in PyMOL: ray 2400, 2400; png output.png, dpi=300")
    print(f"\n   3. üåê {html_filename}")
    print(f"      - Open in any web browser")
    print(f"      - Interactive 3D - rotate, zoom, inspect")
    print(f"      - Right-click to save as image")

    print("\nüìä Excel File Contains:")
    print("   ‚Ä¢ Protein Info - Basic information")
    print("   ‚Ä¢ Pocket Summary - Key characteristics")
    print("   ‚Ä¢ Composition - Residue type breakdown")
    print("   ‚Ä¢ Top Residues - Most frequent residues")
    print("   ‚Ä¢ All Residues - Complete list with types")
    print("   ‚Ä¢ Formatted String - Pocket string format")

    print("\nüé® Color Legend:")
    print("   üü° Yellow  = Hydrophobic residues")
    print("   üîµ Blue    = Positive charged residues")
    print("   üî¥ Red     = Negative charged residues")
    print("   üü¢ Green   = Polar uncharged residues")
    print("   üîµ Cyan    = Pocket surface")
    print("="*80)

    # Display interactive view
    print("\nüî¨ Interactive 3D Visualization:")
    view.show()

else:
    print(f"‚ùå Failed (Status: {response.status_code})")

Fetching AlphaFold structure for Q5LHT1...
‚úÖ Successfully downloaded: Q5LHT1.pdb

                         POCKET ANALYSIS REPORT

üìã PROTEIN INFORMATION
--------------------------------------------------------------------------------
    Property                                                                                                                          Value
  UniProt ID                                                                                                                         Q5LHT1
Protein Name                                                                                          Riboflavin biosynthesis protein RibBA
    Organism Bacteroides fragilis (strain ATCC 25285 / DSM 2151 / CCUG 4856 / JCM 11019 / LMG 10263 / NCTC 9343 / Onslow / VPI 2553 / EN-2)
       Chain                                                                                                                              A

üîç POCKET SUMMARY
----------------------------------------

In [None]:
!pip install py3Dmol biopython requests pandas openpyxl -q

import requests
import py3Dmol
from collections import Counter
from Bio.PDB import PDBParser
import pandas as pd

uniprot_id = "A0A096CN98"

pocket_residues = [19, 20, 21, 22, 23, 24, 25, 26, 28, 31, 32, 47, 50, 51, 52, 91, 92, 95, 122, 123, 125, 126, 131, 133, 134, 153, 155, 182, 183, 184, 185, 186, 187, 190, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 215, 216, 224, 225, 226, 227, 228, 229, 230, 231, 252, 253, 255, 256, 257, 259, 321, 322, 341, 342, 343, 344, 349, 351, 352, 353, 354, 355, 356, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 369, 373, 375, 376, 377]

# Fetch using AlphaFold API
print(f"Fetching AlphaFold structure for {uniprot_id}...")

api_url = f"https://alphafold.ebi.ac.uk/api/prediction/{uniprot_id}"
response = requests.get(api_url)

if response.status_code == 200:
    data = response.json()
    pdb_url = data[0]['pdbUrl']

    # Download structure
    pdb_response = requests.get(pdb_url)
    pdb_file = f"{uniprot_id}.pdb"

    with open(pdb_file, 'wb') as f:
        f.write(pdb_response.content)

    print(f"‚úÖ Successfully downloaded: {pdb_file}\n")

    # Parse the structure
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure(uniprot_id, pdb_file)

    # Get protein information from UniProt
    uniprot_url = f"https://rest.uniprot.org/uniprotkb/{uniprot_id}.json"
    uniprot_response = requests.get(uniprot_url)

    protein_name = "Unknown"
    organism = "Unknown"

    if uniprot_response.status_code == 200:
        uniprot_data = uniprot_response.json()

        # Get protein name
        if 'proteinDescription' in uniprot_data:
            if 'recommendedName' in uniprot_data['proteinDescription']:
                protein_name = uniprot_data['proteinDescription']['recommendedName']['fullName']['value']
            elif 'submittedName' in uniprot_data['proteinDescription']:
                protein_name = uniprot_data['proteinDescription']['submittedName'][0]['fullName']['value']

        organism = uniprot_data.get('organism', {}).get('scientificName', 'Unknown')

    # Extract residue details from pocket
    pocket_residue_details = []

    for model in structure:
        for chain in model:
            chain_id = chain.get_id()
            for residue in chain:
                res_num = residue.get_id()[1]
                if res_num in pocket_residues:
                    res_name = residue.get_resname()
                    pocket_residue_details.append({
                        'chain': chain_id,
                        'resnum': res_num,
                        'resname': res_name
                    })

    # Sort by residue number
    pocket_residue_details.sort(key=lambda x: x['resnum'])

    # Generate pocket name based on characteristics
    residue_names = [r['resname'] for r in pocket_residue_details]

    # Classify residues
    hydrophobic = ['VAL', 'LEU', 'ILE', 'PHE', 'TRP', 'MET', 'ALA', 'PRO']
    positive = ['LYS', 'ARG', 'HIS']
    negative = ['ASP', 'GLU']
    polar = ['SER', 'THR', 'ASN', 'GLN', 'CYS', 'TYR', 'GLY']
    aromatic = ['PHE', 'TYR', 'TRP', 'HIS']

    hydrophobic_count = sum(1 for r in residue_names if r in hydrophobic)
    positive_count = sum(1 for r in residue_names if r in positive)
    negative_count = sum(1 for r in residue_names if r in negative)
    polar_count = sum(1 for r in residue_names if r in polar)
    aromatic_count = sum(1 for r in residue_names if r in aromatic)

    total = len(residue_names)

    # Determine pocket type
    pocket_type = []
    if hydrophobic_count / total > 0.4:
        pocket_type.append("Hydrophobic")
    if aromatic_count >= 3:
        pocket_type.append("Aromatic")
    if positive_count > negative_count + 1:
        pocket_type.append("Cationic")
    elif negative_count > positive_count + 1:
        pocket_type.append("Anionic")
    if polar_count / total > 0.3:
        pocket_type.append("Polar")

    if not pocket_type:
        pocket_type = ["Mixed"]

    pocket_name = f"{'-'.join(pocket_type)} Binding Pocket"

    # ========================================
    # CREATE TABLES
    # ========================================

    print("="*80)
    print(" "*25 + "POCKET ANALYSIS REPORT")
    print("="*80)

    # Table 1: Protein Information
    protein_info = {
        'Property': ['UniProt ID', 'Protein Name', 'Organism', 'Chain'],
        'Value': [uniprot_id, protein_name, organism, pocket_residue_details[0]['chain']]
    }
    df_protein = pd.DataFrame(protein_info)

    print("\nüìã PROTEIN INFORMATION")
    print("-"*80)
    print(df_protein.to_string(index=False))

    # Table 2: Pocket Summary
    pocket_summary = {
        'Property': ['Pocket Name', 'Location', 'Total Residues', 'Net Charge',
                     'Hydrophobic Ratio', 'Polar Ratio', 'Aromatic Count'],
        'Value': [
            pocket_name,
            f"Residues {min(pocket_residues)}-{max(pocket_residues)}",
            f"{total} residues",
            f"{positive_count - negative_count:+d}",
            f"{hydrophobic_count/total:.2f}",
            f"{polar_count/total:.2f}",
            f"{aromatic_count}"
        ]
    }
    df_pocket = pd.DataFrame(pocket_summary)

    print("\nüîç POCKET SUMMARY")
    print("-"*80)
    print(df_pocket.to_string(index=False))

    # Table 3: Pocket Composition
    composition_data = {
        'Residue Type': ['Hydrophobic', 'Positive Charged', 'Negative Charged', 'Polar Uncharged', 'Aromatic'],
        'Count': [hydrophobic_count, positive_count, negative_count, polar_count, aromatic_count],
        'Percentage': [
            f"{hydrophobic_count/total*100:.1f}%",
            f"{positive_count/total*100:.1f}%",
            f"{negative_count/total*100:.1f}%",
            f"{polar_count/total*100:.1f}%",
            f"{aromatic_count/total*100:.1f}%"
        ],
        'Color Code': ['üü° Yellow', 'üîµ Blue', 'üî¥ Red', 'üü¢ Green', 'üü£ Magenta']
    }
    df_composition = pd.DataFrame(composition_data)

    print("\nüìä POCKET COMPOSITION")
    print("-"*80)
    print(df_composition.to_string(index=False))

    # Table 4: Top Residues
    res_counter = Counter(residue_names)
    top_residues = res_counter.most_common(10)

    residue_freq = {
        'Residue': [res for res, count in top_residues],
        'Count': [count for res, count in top_residues],
        'Frequency': [f"{count/total*100:.1f}%" for res, count in top_residues]
    }
    df_residues = pd.DataFrame(residue_freq)

    print("\nüß¨ TOP RESIDUES IN POCKET")
    print("-"*80)
    print(df_residues.to_string(index=False))

    # Table 5: Complete Residue List
    residue_table_data = []
    for res in pocket_residue_details:
        # Determine type
        res_type = "Other"
        if res['resname'] in hydrophobic:
            res_type = "Hydrophobic"
        elif res['resname'] in positive:
            res_type = "Positive"
        elif res['resname'] in negative:
            res_type = "Negative"
        elif res['resname'] in polar:
            res_type = "Polar"

        residue_table_data.append({
            'Position': res['resnum'],
            'Residue': res['resname'],
            'Type': res_type,
            'Chain': res['chain']
        })

    df_all_residues = pd.DataFrame(residue_table_data)

    print("\nüìù COMPLETE RESIDUE LIST")
    print("-"*80)
    print(df_all_residues.head(20).to_string(index=False))
    if len(df_all_residues) > 20:
        print(f"... and {len(df_all_residues) - 20} more residues")

    # Table 6: Formatted Pocket String
    formatted_residues = []
    for residue in pocket_residue_details:
        formatted_residues.append(f"{residue['resname']}_{residue['resnum']}_{residue['chain']}")

    pocket_string = "".join(formatted_residues)

    df_pocket_string = pd.DataFrame({
        'Formatted Pocket String': [pocket_string]
    })

    print("\nüìã FORMATTED POCKET STRING")
    print("-"*80)
    print(f"{pocket_string[:100]}...")

    # ========================================
    # SAVE EXCEL FILE WITH MULTIPLE SHEETS
    # ========================================

    excel_filename = f"{uniprot_id}_pocket_analysis_pocket2.xlsx"

    with pd.ExcelWriter(excel_filename, engine='openpyxl') as writer:
        df_protein.to_excel(writer, sheet_name='Protein Info', index=False)
        df_pocket.to_excel(writer, sheet_name='Pocket Summary', index=False)
        df_composition.to_excel(writer, sheet_name='Composition', index=False)
        df_residues.to_excel(writer, sheet_name='Top Residues', index=False)
        df_all_residues.to_excel(writer, sheet_name='All Residues', index=False)
        df_pocket_string.to_excel(writer, sheet_name='Formatted String', index=False)

    print(f"\n‚úÖ Excel file saved: {excel_filename}")

    # ========================================
    # CREATE PYMOL SCRIPT
    # ========================================

    pymol_script = f"""# PyMOL Script for Pocket Visualization
# Protein: {protein_name}
# UniProt ID: {uniprot_id}
# Pocket: {pocket_name}

# Load structure
load {pdb_file}

# Remove waters and hetero atoms
remove solvent
remove organic

# Select pocket residues
select pocket, resi {'+'.join(map(str, pocket_residues))}

# Basic styling
hide everything
show cartoon
color gray80, all

# Show pocket surface
show surface, pocket
color cyan, pocket
set transparency, 0.4, pocket

# Color residues by type

# Hydrophobic residues (Yellow)
select hydrophobic, pocket and (resn VAL+LEU+ILE+PHE+TRP+MET+ALA+PRO)
show sticks, hydrophobic
color yellow, hydrophobic
util.cbay hydrophobic

# Positively charged residues (Blue)
select positive, pocket and (resn LYS+ARG+HIS)
show sticks, positive
color blue, positive
util.cbab positive

# Negatively charged residues (Red)
select negative, pocket and (resn ASP+GLU)
show sticks, negative
color red, negative
util.cbar negative

# Polar uncharged residues (Green)
select polar, pocket and (resn SER+THR+ASN+GLN+CYS+TYR+GLY)
show sticks, polar
color green, polar
util.cbag polar

# Aromatic residues (Magenta highlight)
select aromatic, pocket and (resn PHE+TYR+TRP+HIS)
show sticks, aromatic

# Visual settings
bg_color white
set ray_shadows, 0
set antialias, 2
set cartoon_fancy_helices, 1
set cartoon_smooth_loops, 1
set stick_radius, 0.3

# Center view on pocket
zoom pocket
orient pocket

# Label the pocket
pseudoatom pocket_label, pos=[0,0,0], selection=pocket
label pocket_label, "{pocket_name}"
set label_size, 20
set label_color, black

print "="*60
print "Pocket Visualization Loaded!"
print "="*60
print "Color Legend:"
print "  Yellow = Hydrophobic"
print "  Blue   = Positive charged"
print "  Red    = Negative charged"
print "  Green  = Polar"
print "  Cyan   = Pocket surface"
print "="*60

# Commands to save high-quality images:
# For publication quality PNG (300 DPI):
# ray 2400, 2400
# png {uniprot_id}_pocket2_highres.png, dpi=300

# For standard PNG:
# png {uniprot_id}_pocket2.png, width=1200, height=1200

# For publication quality (ray-traced):
# set ray_trace_mode, 1
# ray 3000, 3000
# png {uniprot_id}_pocket2_raytrace.png, dpi=300
"""

    pymol_script_file = f"{uniprot_id}_pymol_visualization_pocket2.pml"
    with open(pymol_script_file, 'w') as f:
        f.write(pymol_script)

    print(f"‚úÖ PyMOL script saved: {pymol_script_file}")

    # ========================================
    # CREATE HTML INTERACTIVE VISUALIZATION
    # ========================================

    # Read PDB data
    with open(pdb_file, 'r') as f:
        pdb_data = f.read()

    # Create 3D visualization
    view = py3Dmol.view(width=1000, height=800)
    view.addModel(pdb_data, 'pdb')

    # Show protein
    view.setStyle({'cartoon': {'color': 'lightgray'}})

    # Highlight pocket
    view.addSurface(py3Dmol.VDW,
                    {'opacity': 0.6, 'color': 'cyan'},
                    {'resi': pocket_residues})

    # Color residues by type - Hydrophobic (Yellow)
    view.addStyle({'resi': pocket_residues, 'resn': ['VAL', 'LEU', 'ILE', 'PHE', 'TRP', 'MET', 'ALA', 'PRO']},
                  {'stick': {'color': 'yellow', 'radius': 0.3}})

    # Positive charged (Blue)
    view.addStyle({'resi': pocket_residues, 'resn': ['LYS', 'ARG', 'HIS']},
                  {'stick': {'color': 'blue', 'radius': 0.3}})

    # Negative charged (Red)
    view.addStyle({'resi': pocket_residues, 'resn': ['ASP', 'GLU']},
                  {'stick': {'color': 'red', 'radius': 0.3}})

    # Polar (Green)
    view.addStyle({'resi': pocket_residues, 'resn': ['SER', 'THR', 'ASN', 'GLN', 'CYS', 'TYR', 'GLY']},
                  {'stick': {'color': 'green', 'radius': 0.3}})

    # Add label
    view.addLabel(f"Pocket: {pocket_name}",
                  {'fontColor': 'black', 'fontSize': 18, 'backgroundColor': 'white', 'backgroundOpacity': 0.9},
                  {'resi': pocket_residues[0]})

    view.zoomTo({'resi': pocket_residues})
    view.setBackgroundColor('white')

    # Save as HTML
    html_filename = f"{uniprot_id}_pocket_interactive_pocket2.html"
    view.write_html(html_filename)

    print(f"‚úÖ Interactive HTML saved: {html_filename}")

    # ========================================
    # SUMMARY
    # ========================================

    print("\n" + "="*80)
    print("‚úÖ ANALYSIS COMPLETE - POCKET 2!")
    print("="*80)
    print(f"\nüì¶ Files Generated:")
    print(f"   1. üìä {excel_filename}")
    print(f"      - Contains 6 sheets with all analysis data")
    print(f"\n   2. üß™ {pymol_script_file}")
    print(f"      - Load in PyMOL: File ‚Üí Run Script ‚Üí Select this file")
    print(f"      - Or in PyMOL command: @{pymol_script_file}")
    print(f"      - To save image in PyMOL: ray 2400, 2400; png output.png, dpi=300")
    print(f"\n   3. üåê {html_filename}")
    print(f"      - Open in any web browser")
    print(f"      - Interactive 3D - rotate, zoom, inspect")
    print(f"      - Right-click to save as image")

    print("\nüìä Excel File Contains:")
    print("   ‚Ä¢ Protein Info - Basic information")
    print("   ‚Ä¢ Pocket Summary - Key characteristics")
    print("   ‚Ä¢ Composition - Residue type breakdown")
    print("   ‚Ä¢ Top Residues - Most frequent residues")
    print("   ‚Ä¢ All Residues - Complete list with types")
    print("   ‚Ä¢ Formatted String - Pocket string format")

    print("\nüé® Color Legend:")
    print("   üü° Yellow  = Hydrophobic residues")
    print("   üîµ Blue    = Positive charged residues")
    print("   üî¥ Red     = Negative charged residues")
    print("   üü¢ Green   = Polar uncharged residues")
    print("   üîµ Cyan    = Pocket surface")
    print("="*80)

    # Display interactive view
    print("\nüî¨ Interactive 3D Visualization:")
    view.show()

else:
    print(f"‚ùå Failed (Status: {response.status_code})")

Fetching AlphaFold structure for A0A096CN98...
‚úÖ Successfully downloaded: A0A096CN98.pdb

                         POCKET ANALYSIS REPORT

üìã PROTEIN INFORMATION
--------------------------------------------------------------------------------
    Property                             Value
  UniProt ID                        A0A096CN98
Protein Name          Histidinol dehydrogenase
    Organism Flavonifractor plautii 1_3_50AFAA
       Chain                                 A

üîç POCKET SUMMARY
--------------------------------------------------------------------------------
         Property                                             Value
      Pocket Name Hydrophobic-Aromatic-Anionic-Polar Binding Pocket
         Location                                   Residues 19-377
   Total Residues                                       89 residues
       Net Charge                                                -3
Hydrophobic Ratio                                              0.40
      Po