In [1]:
import pandas as pd
import plotly.graph_objects as go
import plotly.express as px
from ipywidgets import interact, Dropdown, VBox, HBox, HTML, Text, Button, Output, interactive
import numpy as np

# Load and prepare the data
def load_data(filename='db.csv'):
    """Load and clean the CSV data"""
    try:
        df = pd.read_csv(filename)
        
        # Clean column names (remove extra spaces)
        df.columns = df.columns.str.strip()
        
        # Filter out rows where D, P, or H are missing
        df = df.dropna(subset=['D', 'P', 'H'])
        
        print(f"Loaded {len(df)} compounds with D, P, H values")
        print(f"Available columns: {list(df.columns)}")
        
        return df
    
    except FileNotFoundError:
        print(f"Error: Could not find file '{filename}'")
        print("Make sure the CSV file is in the same directory as your notebook")
        return None
    except Exception as e:
        print(f"Error loading data: {e}")
        return None

# Get numeric columns for color coding
def get_numeric_columns(df):
    """Get list of numeric columns suitable for color coding"""
    # Exclude coordinate columns and text columns
    exclude_cols = ['D', 'P', 'H', 'Name', 'CAS', 'SMILES', 'alias', 'synonyms', 'Note']
    
    numeric_cols = []
    for col in df.columns:
        if col not in exclude_cols:
            # Check if column has numeric values
            if pd.api.types.is_numeric_dtype(df[col]) or df[col].dtype == 'object':
                # For object columns, check if they can be converted to numeric
                try:
                    pd.to_numeric(df[col], errors='coerce')
                    if df[col].notna().sum() > 0:  # Has some non-null values
                        numeric_cols.append(col)
                except:
                    continue
            elif pd.api.types.is_numeric_dtype(df[col]):
                if df[col].notna().sum() > 0:  # Has some non-null values
                    numeric_cols.append(col)
    
    return sorted(numeric_cols)

# Search function to find matching compounds
def search_compounds(df, search_terms):
    """
    Search for compounds based on name, CAS, or synonyms
    Returns indices of matching compounds
    """
    if not search_terms.strip():
        return []
    
    # Split search terms by comma and clean them
    terms = [term.strip().lower() for term in search_terms.split(',') if term.strip()]
    
    matching_indices = []
    
    for idx, row in df.iterrows():
        # Check Name column
        name = str(row.get('Name', '')).lower()
        
        # Check CAS column
        cas = str(row.get('CAS', '')).lower()
        
        # Check synonyms column
        synonyms = str(row.get('synonyms', '')).lower()
        
        # Check if any search term matches
        for term in terms:
            if (term in name or 
                term in cas or 
                term in synonyms):
                matching_indices.append(idx)
                break  # Found a match, no need to check other terms for this compound
    
    return matching_indices

# Create the 3D plot with search highlighting (highlight while showing all)
def create_3d_plot_with_search(df, color_by=None, search_terms=""):
    """Create 3D scatter plot highlighting searched compounds while showing all"""
    
    # Find matching compounds
    highlighted_indices = search_compounds(df, search_terms)
    
    # Always show all compounds
    display_df = df.copy()
    
    if search_terms.strip() and not highlighted_indices:
        # No matches found - show message but still show all data
        print(f"❌ No compounds found matching '{search_terms}' - showing all compounds")
    
    # Prepare hover text
    hover_text = []
    for idx, row in display_df.iterrows():
        text = f"<b>{row.get('Name', 'Unknown')}</b><br>"
        text += f"D: {row['D']}<br>"
        text += f"P: {row['P']}<br>"
        text += f"H: {row['H']}<br>"
        
        if color_by and color_by in df.columns:
            color_val = row.get(color_by)
            if pd.notna(color_val):
                text += f"{color_by}: {color_val}<br>"
        
        if 'CAS' in df.columns and pd.notna(row.get('CAS')):
            text += f"CAS: {row['CAS']}<br>"
        
        if 'synonyms' in df.columns and pd.notna(row.get('synonyms')):
            synonyms_text = str(row['synonyms'])
            text += f"Synonyms: {synonyms_text[:100]}{'...' if len(synonyms_text) > 100 else ''}"
        
        # Add highlight indicator for searched compounds
        if idx in highlighted_indices:
            text = f"🔍 HIGHLIGHTED: {text}"
            
        hover_text.append(text)
    
    # Create the scatter plot
    fig = go.Figure()
    
    # Determine colors and markers
    if color_by and color_by in df.columns:
        # Convert to numeric if possible
        color_values = pd.to_numeric(display_df[color_by], errors='coerce')
        
        if highlighted_indices:
            # Create two separate traces: normal and highlighted
            
            # Normal (non-highlighted) compounds
            normal_mask = ~display_df.index.isin(highlighted_indices)
            if normal_mask.any():
                normal_hover = [hover_text[i] for i, idx in enumerate(display_df.index) if idx not in highlighted_indices]
                fig.add_trace(go.Scatter3d(
                    x=display_df.loc[normal_mask, 'D'],
                    y=display_df.loc[normal_mask, 'P'],
                    z=display_df.loc[normal_mask, 'H'],
                    mode='markers',
                    marker=dict(
                        size=5,
                        color=color_values[normal_mask],
                        colorscale='Viridis',
                        colorbar=dict(
                            title=color_by,
                            titleside="right",
                            titlefont=dict(size=14),
                            thickness=20,
                            len=0.8
                        ),
                        opacity=0.5,  # Make background points more transparent
                        line=dict(color='rgba(50, 50, 50, 0.2)', width=0.5)
                    ),
                    text=normal_hover,
                    hovertemplate='%{text}<extra></extra>',
                    name='Other Compounds',
                    showlegend=False
                ))
            
            # Highlighted compounds
            highlight_mask = display_df.index.isin(highlighted_indices)
            highlight_hover = [hover_text[i] for i, idx in enumerate(display_df.index) if idx in highlighted_indices]
            fig.add_trace(go.Scatter3d(
                x=display_df.loc[highlight_mask, 'D'],
                y=display_df.loc[highlight_mask, 'P'],
                z=display_df.loc[highlight_mask, 'H'],
                mode='markers',
                marker=dict(
                    size=12,
                    color=color_values[highlight_mask],
                    colorscale='Viridis',
                    opacity=1.0,
                    line=dict(color='red', width=4),
                    symbol='diamond'
                ),
                text=highlight_hover,
                hovertemplate='%{text}<extra></extra>',
                name=f'Highlighted ({len(highlighted_indices)})',
                showlegend=True
            ))
        else:
            # No highlights - show all compounds normally
            fig.add_trace(go.Scatter3d(
                x=display_df['D'],
                y=display_df['P'],
                z=display_df['H'],
                mode='markers',
                marker=dict(
                    size=6,
                    color=color_values,
                    colorscale='Viridis',
                    colorbar=dict(
                        title=color_by,
                        titleside="right",
                        titlefont=dict(size=14),
                        thickness=20,
                        len=0.8
                    ),
                    opacity=0.8,
                    line=dict(color='rgba(50, 50, 50, 0.3)', width=0.5)
                ),
                text=hover_text,
                hovertemplate='%{text}<extra></extra>',
                name='All Compounds',
                showlegend=False
            ))
    else:
        # No color coding
        if highlighted_indices:
            # Create two separate traces: normal and highlighted
            
            # Normal (non-highlighted) compounds
            normal_mask = ~display_df.index.isin(highlighted_indices)
            if normal_mask.any():
                normal_hover = [hover_text[i] for i, idx in enumerate(display_df.index) if idx not in highlighted_indices]
                fig.add_trace(go.Scatter3d(
                    x=display_df.loc[normal_mask, 'D'],
                    y=display_df.loc[normal_mask, 'P'],
                    z=display_df.loc[normal_mask, 'H'],
                    mode='markers',
                    marker=dict(
                        size=5,
                        color='lightblue',
                        opacity=0.4,  # Make background points more transparent
                        line=dict(color='rgba(50, 50, 50, 0.2)', width=0.5)
                    ),
                    text=normal_hover,
                    hovertemplate='%{text}<extra></extra>',
                    name='Other Compounds',
                    showlegend=False
                ))
            
            # Highlighted compounds
            highlight_mask = display_df.index.isin(highlighted_indices)
            highlight_hover = [hover_text[i] for i, idx in enumerate(display_df.index) if idx in highlighted_indices]
            fig.add_trace(go.Scatter3d(
                x=display_df.loc[highlight_mask, 'D'],
                y=display_df.loc[highlight_mask, 'P'],
                z=display_df.loc[highlight_mask, 'H'],
                mode='markers',
                marker=dict(
                    size=12,
                    color='red',
                    opacity=1.0,
                    line=dict(color='darkred', width=4),
                    symbol='diamond'
                ),
                text=highlight_hover,
                hovertemplate='%{text}<extra></extra>',
                name=f'Highlighted ({len(highlighted_indices)})',
                showlegend=True
            ))
        else:
            # No highlights - show all compounds normally
            fig.add_trace(go.Scatter3d(
                x=display_df['D'],
                y=display_df['P'],
                z=display_df['H'],
                mode='markers',
                marker=dict(
                    size=6,
                    color='#1f77b4',
                    opacity=0.8,
                    line=dict(color='rgba(50, 50, 50, 0.3)', width=0.5)
                ),
                text=hover_text,
                hovertemplate='%{text}<extra></extra>',
                name='All Compounds',
                showlegend=False
            ))
    
    # Update layout
    if search_terms.strip() and highlighted_indices:
        title_text = f'3D Plot: {len(highlighted_indices)} compounds highlighted for "{search_terms}" (showing all {len(df)} compounds)'
    else:
        title_text = f'3D Scatter Plot: Hansen Solubility Parameters ({len(df)} compounds)'
    
    fig.update_layout(
        title={
            'text': title_text,
            'x': 0.5,
            'font': {'size': 18}
        },
        scene=dict(
            xaxis_title='D (Dispersion)',
            yaxis_title='P (Polarity)',
            zaxis_title='H (Hydrogen Bonding)',
            camera=dict(eye=dict(x=1.5, y=1.5, z=1.5))
        ),
        width=900,
        height=600,
        margin=dict(l=0, r=0, b=0, t=50),
        legend=dict(
            yanchor="top",
            y=0.99,
            xanchor="left",
            x=0.01
        )
    )
    
    return fig

# Simple interactive function that works
def interactive_plot_with_search(df, numeric_columns):
    """Create interactive plot with dropdown widget and search functionality"""
    
    def update_plot(color_variable='None', search_terms=''):
        color_by = None if color_variable == 'None' else color_variable
        fig = create_3d_plot_with_search(df, color_by, search_terms)
        fig.show()
        
        # Show search results info
        if search_terms.strip():
            matches = search_compounds(df, search_terms)
            if matches:
                print(f"✅ Found {len(matches)} compounds matching '{search_terms}'")
                # Show first few matches
                match_names = [df.loc[idx, 'Name'] for idx in matches[:5] if pd.notna(df.loc[idx, 'Name'])]
                if match_names:
                    print(f"Examples: {', '.join(match_names)}")
                    if len(matches) > 5:
                        print(f"... and {len(matches) - 5} more")
            else:
                print(f"❌ No compounds found matching '{search_terms}'")
    
    # Create dropdown options
    dropdown_options = ['None'] + numeric_columns
    default_color = 'mw' if 'mw' in numeric_columns else (dropdown_options[1] if len(dropdown_options) > 1 else 'None')
    
    # Use interact with fixed parameters
    interact(update_plot,
             color_variable=Dropdown(
                 options=dropdown_options,
                 value=default_color,
                 description='Color by:'
             ),
             search_terms=Text(
                 value='',
                 placeholder='Enter compound names, CAS numbers, or keywords (comma-separated)',
                 description='Search:',
                 style={'description_width': 'initial'}
             ))

# Enhanced main execution function
def create_interactive_3d_visualization_with_search(csv_filename='db.csv'):
    """
    Main function to create the interactive 3D visualization with search
    
    Parameters:
    csv_filename (str): Path to your CSV file
    """
    
    print("Loading data...")
    df = load_data(csv_filename)
    
    if df is None:
        return None
    
    print("\nGetting numeric columns for color coding...")
    numeric_cols = get_numeric_columns(df)
    print(f"Available color variables: {numeric_cols}")
    
    print("\nCreating interactive plot with search functionality...")
    print("🔍 SEARCH BEHAVIOR: When you enter search terms, matching compounds are highlighted while showing ALL compounds!")
    print("💡 Highlighted compounds appear as larger red diamonds")
    
    interactive_plot_with_search(df, numeric_cols)
    
    return df

# Function to show example searches
def show_search_examples(df):
    """Show some example searches based on the actual data"""
    if df is None:
        return
    
    # Get some example compound names
    sample_names = df['Name'].dropna().head(5).tolist()
    
    examples_html = f"""
    <div style='background-color: #e8f5e8; padding: 15px; border-radius: 5px; border-left: 4px solid #28a745; margin-top: 10px;'>
        <b>🔍 Search Examples (try these):</b><br>
        • <code>{sample_names[0] if sample_names else 'acetone'}</code> - Highlight this compound<br>
        • <code>alcohol</code> - Highlight compounds with "alcohol" in name/synonyms<br>
        • <code>acid</code> - Highlight all acids<br>
        • <code>{sample_names[0]}, {sample_names[1] if len(sample_names) > 1 else 'ethanol'}</code> - Highlight multiple specific compounds<br>
        • <code>benzene, toluene</code> - Highlight aromatic compounds<br>
        • <i>Leave empty to show all {len(df)} compounds without highlighting</i><br>
    </div>
    """
    
    display(HTML(examples_html))

# Instructions for use
def show_instructions():
    """Display usage instructions"""
    instructions_html = """
    <div style='background-color: #f0f8ff; padding: 15px; border-radius: 5px; border-left: 4px solid #2196F3;'>
        <h3>🎯 How to Use:</h3>
        <b>Color by:</b> Select which variable colors the data points<br>
        <b>Search:</b> Enter compound names, CAS numbers, or keywords<br><br>
        
        <h3>🔍 Search Behavior:</h3>
        • <b>With search terms:</b> Highlights matching compounds as red diamonds while showing all data<br>
        • <b>Empty search:</b> Shows all compounds normally<br>
        • <b>Multiple terms:</b> Separate with commas<br>
        • <b>Partial matching:</b> "alcohol" finds "methanol", "ethanol", etc.<br><br>
        
        <h3>✨ Features:</h3>
        • 3D plotting of D, P, H values<br>
        • Interactive color coding by any numeric column<br>
        • Search in names, CAS numbers, and synonyms<br>
        • Hover information with compound details<br>
        • 3D navigation (rotate, zoom, pan)<br>
    </div>
    """
    
    display(HTML(instructions_html))

# Run the enhanced application
print("🚀 Starting 3D Visualization with Search...")
df = create_interactive_3d_visualization_with_search('db.csv')

# Display instructions and examples
if df is not None:
    show_instructions()
    show_search_examples(df)

🚀 Starting 3D Visualization with Search...
Loading data...
Loaded 257 compounds with D, P, H values
Available columns: ['No.', 'CAS', 'Name', 'D', 'P', 'H', 'Mole_vol', 'ims_idx', 'bp', 'mw', 'viscosity(mPa.s@25)', 'vis_temp', 'heat_of_vap', 'hov_temp', 'SMILES', 'alias', 'synonyms', 'Note', 'DN']

Getting numeric columns for color coding...
Available color variables: ['DN', 'Mole_vol', 'No.', 'bp', 'heat_of_vap', 'hov_temp', 'ims_idx', 'mw', 'vis_temp', 'viscosity(mPa.s@25)']

Creating interactive plot with search functionality...
🔍 SEARCH BEHAVIOR: When you enter search terms, matching compounds are highlighted while showing ALL compounds!
💡 Highlighted compounds appear as larger red diamonds


interactive(children=(Dropdown(description='Color by:', index=8, options=('None', 'DN', 'Mole_vol', 'No.', 'bp…

HTML(value='\n    <div style=\'background-color: #f0f8ff; padding: 15px; border-radius: 5px; border-left: 4px …

HTML(value='\n    <div style=\'background-color: #e8f5e8; padding: 15px; border-radius: 5px; border-left: 4px …