# Interactive Data Visualizer

This notebook creates an interactive data visualization tool using Plotly and ipywidgets. Run this with Voila to create a user-friendly GUI for exploring the dataset.

In [1]:
# Import required libraries
import pandas as pd
import numpy as np
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import ipywidgets as widgets
from IPython.display import display, clear_output

In [2]:
# Load the data
df = pd.read_csv('db.csv')

# Identify numerical columns
numerical_columns = df.select_dtypes(include=['float64', 'int64']).columns.tolist()

# Add a 'None' option for the color parameter
color_options = ['None'] + numerical_columns

print(f"Dataset loaded with {len(df)} rows and {len(df.columns)} columns")
print(f"Numerical columns available: {numerical_columns}")

Dataset loaded with 257 rows and 19 columns
Numerical columns available: ['No.', 'D', 'P', 'H', 'bp', 'mw', 'viscosity(mPa.s@25)', 'vis_temp', 'heat_of_vap', 'hov_temp', 'DN']


In [3]:
# Create widgets for the interactive visualization
x_dropdown = widgets.Dropdown(
    options=numerical_columns,
    description='X-axis:',
    value=numerical_columns[0] if numerical_columns else None,
    style={'description_width': 'initial'}
)

y_dropdown = widgets.Dropdown(
    options=numerical_columns,
    description='Y-axis:',
    value=numerical_columns[1] if len(numerical_columns) > 1 else numerical_columns[0] if numerical_columns else None,
    style={'description_width': 'initial'}
)

color_dropdown = widgets.Dropdown(
    options=color_options,
    description='Color by:',
    value='None',
    style={'description_width': 'initial'}
)

output = widgets.Output()

In [4]:
# Function to update the plot
def update_plot(*args):
    with output:
        clear_output(wait=True)
        
        # Get selected values
        x_col = x_dropdown.value
        y_col = y_dropdown.value
        color_col = color_dropdown.value if color_dropdown.value != 'None' else None
        
        # Filter out rows with missing values in the selected columns
        columns_to_check = [x_col, y_col]
        if color_col:
            columns_to_check.append(color_col)
            
        filtered_df = df.dropna(subset=columns_to_check)
        
        # Create the figure
        fig = go.Figure()
        
        if color_col:
            # Create a scatter plot with color mapping
            scatter = go.Scatter(
                x=filtered_df[x_col],
                y=filtered_df[y_col],
                mode='markers',
                marker=dict(
                    size=10,
                    color=filtered_df[color_col],
                    colorscale='RdYlBu',  # Red-Yellow-Blue color scale
                    showscale=True,
                    colorbar=dict(title=color_col)
                ),
                text=filtered_df['Name'],  # Show compound name on hover
                hovertemplate='<b>%{text}</b><br>' +
                              f'{x_col}: %{{x}}<br>' +
                              f'{y_col}: %{{y}}<br>' +
                              f'{color_col}: %{{marker.color}}<extra></extra>'
            )
        else:
            # Create a scatter plot without color mapping
            scatter = go.Scatter(
                x=filtered_df[x_col],
                y=filtered_df[y_col],
                mode='markers',
                marker=dict(size=10),
                text=filtered_df['Name'],  # Show compound name on hover
                hovertemplate='<b>%{text}</b><br>' +
                              f'{x_col}: %{{x}}<br>' +
                              f'{y_col}: %{{y}}<extra></extra>'
            )
        
        fig.add_trace(scatter)
        
        # Update layout
        fig.update_layout(
            title=f'Scatter Plot of {y_col} vs {x_col}' + (f' colored by {color_col}' if color_col else ''),
            xaxis_title=x_col,
            yaxis_title=y_col,
            height=600,
            width=800,
            template='plotly_white',
            hoverlabel=dict(bgcolor="white", font_size=12),
            margin=dict(l=50, r=50, t=50, b=50)
        )
        
        # Show the plot
        display(fig)
        
        # Show some statistics
        print(f"Displaying {len(filtered_df)} data points out of {len(df)} total records")
        if len(filtered_df) < len(df):
            print(f"{len(df) - len(filtered_df)} points were excluded due to missing values")

In [5]:
# Connect the widgets to the update function
x_dropdown.observe(update_plot, names='value')
y_dropdown.observe(update_plot, names='value')
color_dropdown.observe(update_plot, names='value')

# Create a UI layout
controls = widgets.HBox([x_dropdown, y_dropdown, color_dropdown])
ui = widgets.VBox([controls, output])

# Display the UI
display(ui)

# Initial plot update
update_plot()

VBox(children=(HBox(children=(Dropdown(description='X-axis:', options=('No.', 'D', 'P', 'H', 'bp', 'mw', 'visc…

## How to use this visualization tool

1. Select the variable for the X-axis from the first dropdown
2. Select the variable for the Y-axis from the second dropdown
3. Optionally, select a third variable to use for coloring the data points
   - If "None" is selected, all points will have the same color
   - When a variable is selected, points will be colored using the RdYlBu (Red-Yellow-Blue) color scheme

Note: Data points with missing values in any of the selected variables will not be displayed in the plot.