# Interactive UniProt Structure Viewer with Decision Buttons

This notebook allows you to:
1. Process a list of UniProt IDs sequentially
2. View the AlphaFold structure for each protein
3. Categorize each protein as "Accept", "Refuse", or "Indecisive"
4. Get a summary of your decisions

## Instructions
- Enter your UniProt IDs in the list
- The structures will be displayed one at a time
- Click one of the three buttons to categorize each structure
- After processing all structures, a summary will be displayed

In [39]:
import os
import sys
import argparse
import requests
from typing import List, Dict, Optional, Tuple, Callable, Any, Union

# Check for nglview installation
try:
    import nglview as nv
    from IPython.display import display, clear_output
    import ipywidgets as widgets
    NGLVIEW_AVAILABLE = True
except ImportError:
    NGLVIEW_AVAILABLE = False
    print("Warning: nglview, ipywidgets, or IPython not installed. Visualization will not be available.")
    print("Install with: pip install nglview ipywidgets")
    print("For Jupyter notebook support, also run: jupyter-nbextension enable --py --sys-prefix widgetsnbextension")

def download_alphafold_pdb(uniprot_id: str, output_dir: Optional[str] = None) -> str:
    """
    Download AlphaFold structure PDB file for a given UniProt ID.
    If the file already exists locally, it will be reused instead of downloading again.
    
    Parameters:
    -----------
    uniprot_id : str
        UniProt identifier for the protein
    output_dir : str, optional
        Directory where the PDB file will be saved. If None, current directory is used.
    
    Returns:
    --------
    str
        Path to the downloaded PDB file
    """
    # Create output directory if needed
    if output_dir is None:
        output_dir = os.getcwd()
    os.makedirs(output_dir, exist_ok=True)
    
    # Define the output file path
    output_file = os.path.join(output_dir, f"{uniprot_id}.pdb")
    
    # Check if file already exists
    if os.path.exists(output_file):
        print(f"Using existing PDB file for {uniprot_id} at {output_file}")
        return output_file
    
    # URL for AlphaFold DB
    url = f"https://alphafold.ebi.ac.uk/files/AF-{uniprot_id}-F1-model_v4.pdb"
    
    print(f"Downloading AlphaFold structure for {uniprot_id}...")
    try:
        response = requests.get(url, stream=True)
        response.raise_for_status()
        
        with open(output_file, 'wb') as f:
            for chunk in response.iter_content(chunk_size=8192):
                f.write(chunk)
        
        print(f"Downloaded structure to {output_file}")
        return output_file
    
    except requests.exceptions.RequestException as e:
        print(f"Error downloading structure: {e}")
        return ""

def visualize_structure(pdb_file: str) -> Optional[Any]:
    """
    Visualize a PDB structure using nglview.
    
    Parameters:
    -----------
    pdb_file : str
        Path to the PDB file
        
    Returns:
    --------
    nglview.widget.NGLWidget or None
        The visualization widget if nglview is available, None otherwise
    """
    if not NGLVIEW_AVAILABLE or not os.path.exists(pdb_file):
        print("Cannot visualize: nglview not available or PDB file not found")
        return None
    
    # Using NGLView with enhanced settings
    view = nv.show_file(pdb_file)
    
    # Set larger size for better visualization
    view._remote_call('setSize', target='Widget', args=['800px', '600px'])
    
    # Center and orient the view
    view.center()
    view.stage.set_parameters(clip_dist=0)
    
    # Add representations with optimized settings
    view.clear_representations()
    # Use 'sstruc' for secondary structure coloring
    view.add_representation('cartoon', quality='high', color='sstruc', smoothSheet=True)
    # view.add_representation('ball+stick', selection='ligand', radius=0.5)
    
    # Performance optimization
    # FIXME: view.update_settings() method does not exist
    # view.update_settings(impostor=True, quality='high', 
    #                    clipDist=0, panSpeed=1, backgroundColor='white')
    
    return view


In [40]:
from typing import List, Optional, Callable, Any, Set
from IPython.display import display, clear_output
import ipywidgets as widgets
import os
import pathlib

# Create data structures to categorize UniProt IDs
class ProteinDecisionManager:
    """
    Class to manage the processing and categorization of UniProt IDs based on user decisions.
    """
    def __init__(self, uniprot_ids: List[str], output_pdb: str = '.', output_decisions: str = '.', input_file_path: Optional[str] = None):
        """
        Initialize the decision manager with a list of UniProt IDs.
        
        Parameters:
        -----------
        uniprot_ids : List[str]
            List of UniProt IDs to process
        output_pdb : str
            Directory to save downloaded PDB files
        output_decisions : str
            Directory to save decisions
        input_file_path : str, optional
            Path to the file containing the UniProt IDs
        """
        self.uniprot_ids: List[str] = uniprot_ids
        self.output_dir: str = output_pdb
        self.current_index: int = 0
        
        # Determine the decisions directory name based on input file
        decisions_dirname = "decisions"
        if input_file_path:
            # Extract the base name without extension
            base_name = os.path.splitext(os.path.basename(input_file_path))[0]
            decisions_dirname = base_name
            
        # Files to store decisions
        self.decisions_dir = os.path.join(output_decisions, decisions_dirname)
        os.makedirs(self.decisions_dir, exist_ok=True)
        
        print(f"Using decisions directory: {self.decisions_dir}")
        
        self.accept_file = os.path.join(self.decisions_dir, 'accepted_ids.txt')
        self.refuse_file = os.path.join(self.decisions_dir, 'refused_ids.txt')
        self.indecisive_file = os.path.join(self.decisions_dir, 'indecisive_ids.txt')
        
        # Load previous decisions if files exist
        self.accepted_ids: Set[str] = self._load_decisions(self.accept_file)
        self.refused_ids: Set[str] = self._load_decisions(self.refuse_file)
        self.indecisive_ids: Set[str] = self._load_decisions(self.indecisive_file)
        
        # Remove already processed IDs
        self._filter_processed_ids()
        
        # Current view and output widgets
        self.current_view: Optional[Any] = None
        self.status_output: Optional[widgets.Output] = None
        self.button_area: Optional[widgets.HBox] = None
        self.main_output: Optional[widgets.VBox] = None
    
    def _load_decisions(self, file_path: str) -> Set[str]:
        """
        Load previously made decisions from a file.
        
        Parameters:
        -----------
        file_path : str
            Path to the decision file
            
        Returns:
        --------
        Set[str]
            Set of UniProt IDs from the file
        """
        if os.path.exists(file_path):
            with open(file_path, 'r') as f:
                return set(line.strip() for line in f if line.strip())
        return set()
    
    def _filter_processed_ids(self) -> None:
        """
        Remove already processed IDs from the list of IDs to process.
        """
        processed_ids = self.accepted_ids | self.refused_ids | self.indecisive_ids
        self.uniprot_ids = [id for id in self.uniprot_ids if id not in processed_ids]
        print(f"Found {len(processed_ids)} previously processed IDs. {len(self.uniprot_ids)} IDs remaining to process.")
    
    def _save_decision(self, decision_type: str, uniprot_id: str) -> None:
        """
        Save a decision to the appropriate file.
        
        Parameters:
        -----------
        decision_type : str
            Type of decision ('accept', 'refuse', or 'indecisive')
        uniprot_id : str
            UniProt ID to save
        """
        if decision_type == 'accept':
            file_path = self.accept_file
            self.accepted_ids.add(uniprot_id)
        elif decision_type == 'refuse':
            file_path = self.refuse_file
            self.refused_ids.add(uniprot_id)
        else:  # indecisive
            file_path = self.indecisive_file
            self.indecisive_ids.add(uniprot_id)
        
        # Append to file
        with open(file_path, 'a') as f:
            f.write(f"{uniprot_id}\n")
    
    def get_current_id(self) -> Optional[str]:
        """
        Get the current UniProt ID to process.
        
        Returns:
        --------
        Optional[str]
            Current UniProt ID or None if all IDs have been processed
        """
        if self.current_index < len(self.uniprot_ids):
            return self.uniprot_ids[self.current_index]
        return None
    
    def process_current_id(self) -> None:
        """
        Process the current UniProt ID, downloading and visualizing its structure.
        """
        current_id = self.get_current_id()
        if current_id is None:
            self.show_summary()
            return
        
        # Download and visualize
        pdb_file = download_alphafold_pdb(current_id, self.output_dir)
        
        if pdb_file:
            # Clear any existing visualization
            if self.status_output:
                with self.status_output:
                    clear_output(wait=True)
                    print(f"Processing {current_id} ({self.current_index + 1}/{len(self.uniprot_ids)})")
            
            # Visualize the new structure
            self.current_view = visualize_structure(pdb_file)
            if self.current_view is not None and self.status_output:
                with self.status_output:
                    display(self.current_view)
        else:
            # Handle download failure
            self._save_decision('indecisive', current_id)
            self.current_index += 1
            self.process_current_id()
    
    def on_decision(self, decision: str) -> Callable:
        """
        Create a decision callback function.
        
        Parameters:
        -----------
        decision : str
            The decision to make ('accept', 'refuse', or 'indecisive')
            
        Returns:
        --------
        Callable
            Callback function for the button
        """
        def decision_callback(button: widgets.Button) -> None:
            current_id = self.get_current_id()
            if current_id is None:
                return
            
            # Save the decision
            self._save_decision(decision, current_id)
            
            # Move to the next ID
            self.current_index += 1
            self.process_current_id()
        
        return decision_callback
    
    def show_summary(self) -> None:
        """
        Display a summary of the categorized UniProt IDs.
        """
        if self.status_output:
            with self.status_output:
                clear_output(wait=True)
                print("Processing complete! Summary:")
                print(f"Accepted IDs ({len(self.accepted_ids)}): {', '.join(self.accepted_ids)}")
                print(f"Refused IDs ({len(self.refused_ids)}): {', '.join(self.refused_ids)}")
                print(f"Indecisive IDs ({len(self.indecisive_ids)}): {', '.join(self.indecisive_ids)}")
                
                print("\nDecision files saved at:")
                print(f"Accepted: {self.accept_file}")
                print(f"Refused: {self.refuse_file}")
                print(f"Indecisive: {self.indecisive_file}")
    
    def create_interface(self) -> widgets.VBox:
        """
        Create the interactive interface with buttons and display area.
        
        Returns:
        --------
        widgets.VBox
            The main interface widget
        """
        # Create status output area
        self.status_output = widgets.Output()
        
        # Create decision buttons
        accept_button = widgets.Button(
            description="Accept",
            button_style="success",
            tooltip="Accept this structure",
            icon="check"
        )
        refuse_button = widgets.Button(
            description="Refuse",
            button_style="danger",
            tooltip="Refuse this structure",
            icon="times"
        )
        indecisive_button = widgets.Button(
            description="Indecisive",
            button_style="warning",
            tooltip="Mark as indecisive",
            icon="question"
        )
        
        # Attach callbacks
        accept_button.on_click(self.on_decision('accept'))
        refuse_button.on_click(self.on_decision('refuse'))
        indecisive_button.on_click(self.on_decision('indecisive'))
        
        # Arrange buttons
        self.button_area = widgets.HBox([accept_button, refuse_button, indecisive_button])
        
        # Create main interface
        self.main_output = widgets.VBox([self.status_output, self.button_area])
        
        return self.main_output
    
    def start(self) -> None:
        """
        Start the processing of UniProt IDs.
        """
        self.process_current_id()

In [41]:
# Example usage: Demo with a few UniProt IDs
# Replace with your own list of UniProt IDs
# uniprot_ids = [
#     'P01308',  # Insulin
#     'P01133',  # EGF
#     'P00533'   # EGFR
# ]

# Uncomment to load from a file
UNIPROT_IDS_PATH = '../production1/armadillo_proteins_entries.txt'
with open(UNIPROT_IDS_PATH, 'r') as f:
    uniprot_ids = [line.strip() for line in f if line.strip()]

In [None]:
# Create pdb output directory
output_pdb = '../../data/PDB'
os.makedirs(output_pdb, exist_ok=True)

# Create decisions output directory
output_decisions = '../../data/ARM_eyeball'
os.makedirs(output_decisions, exist_ok=True)

# Initialize the decision manager
decision_manager = ProteinDecisionManager(uniprot_ids, output_pdb, output_decisions, UNIPROT_IDS_PATH)

# Create and display the interface
interface = decision_manager.create_interface()
display(interface)

# Start processing
decision_manager.start()

Using decisions directory: ../../data/ARM_eyeball/armadillo_proteins_entries
Found 0 previously processed IDs. 348 IDs remaining to process.


VBox(children=(Output(), HBox(children=(Button(button_style='success', description='Accept', icon='check', sty…

Using existing PDB file for A5D8W1 at ../../data/PDB/A5D8W1.pdb
