In [None]:
!pip install dash_bootstrap_templates
!pip install jupyter-dash
!pip install dash
!pip install plotly
!pip install pandas

In [None]:
from dash import Dash, html, dcc, Input, Output, State, clientside_callback, callback, dash_table
from dash_bootstrap_templates import load_figure_template
import dash_bootstrap_components as dbc
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd

In [None]:
# Load the figure template and data
load_figure_template(["minty", "minty_dark"])
df = pd.read_csv('dataset.csv')  # Replace with your dataset. Keep in mind that the dataset has to be with the same columns as the one that has been used
# Extract unique domain names
domain_names = df["Name"].dropna().unique().tolist()

In [None]:
# Initialize the app with suppress_callback_exceptions
app = Dash(__name__, external_stylesheets=[dbc.themes.MINTY, dbc.icons.FONT_AWESOME], suppress_callback_exceptions=True)

In [None]:
# Sidebar with navigation links
sidebar = dbc.Nav(
    [
        dbc.NavLink("Overview", href="/overview", active="exact", className="nav-link"),
        dbc.NavLink("Character Analysis", href="/char-analysis", active="exact", className="nav-link"),
        dbc.NavLink("Length and Ratio Metrics", href="/length-ratio", active="exact", className="nav-link"),
        dbc.NavLink("Entropy and Complexity", href="/entropy-complexity", active="exact", className="nav-link"),
        dbc.NavLink("Domain Name Analysis", href="/domain-analysis", active="exact", className="nav-link"),
        dbc.NavLink("Domain Attribute Viewer", href="/domain-attributes", active="exact", className="nav-link"),
    ],
    vertical=True,
    pills=True,
    className="bg-light p-3"
)

# Filter dropdown for Family
family_filter = dcc.Dropdown(
    id='family-filter',
    options=[{'label': family, 'value': family} for family in df['Family'].unique()],
    placeholder="Filter by Family",
    style={'width': '100%'},
    multi=True
)

# Color mode switch (dark/light mode)
color_mode_switch = html.Span(
    [
        dbc.Label(className="fa fa-moon", html_for="switch"),
        dbc.Switch(id="switch", value=False, className="d-inline-block ms-1", persistence=True),
        dbc.Label(className="fa fa-sun", html_for="switch"),
    ]
)

app.layout = dbc.Container(
    [
        dbc.Row(
            [
                dbc.Col(sidebar, width=2),
                dbc.Col(
                    [
                        html.Div(["Dataset Dashboard"], className="bg-primary text-white h3 p-2"),
                        color_mode_switch,
                        dcc.Location(id='url', refresh=False),
                        html.Div(id="family-filter-container", children=family_filter),  # Wrap family_filter
                        dbc.Container(id="page-content", className="mt-4"),
                    ],
                    width=10,
                ),
            ],
            className="vh-100",
        ),
        html.Div(id="dummy-output", style={"display": "none"}),  # Keep dummy output
    ],
    fluid=True,
)


In [None]:
# Layout for domain attribute viewer
def domain_attribute_viewer_layout():
    return html.Div([
        html.H3("Domain Attribute Viewer"),
        html.Div([
            html.Label("Select Domains"),
            dcc.Dropdown(
                id="dropdown-compare",
                placeholder="Search and select domains",
                multi=True,  # Allow single or multiple selections
                options=[],  # Options will be dynamically loaded
                style={"width": "100%"},
            ),
            html.Div(id="output-compare", className="mt-4"),
        ]),
    ])

# Dynamically Load All Domains
@app.callback(
    Output("dropdown-compare", "options"),
    [Input("dropdown-compare", "search_value")]
)
def load_all_domains(search_value):
    if not search_value:
        # Show all domains (limited to avoid overloading UI for large datasets)
        return [{"label": name, "value": name} for name in domain_names[:100]]

    # Dynamically filter domains based on search input
    filtered_domains = [name for name in domain_names if search_value.lower() in name.lower()]
    return [{"label": name, "value": name} for name in filtered_domains]

# Handle Single or Two-Domain Selection
@app.callback(
    Output("output-compare", "children"),
    [Input("dropdown-compare", "value")]
)
def display_domains(selected_domains):
    if not selected_domains:
        return html.P("No domain selected. Please choose one or two domains.", style={"color": "red"})

    if len(selected_domains) == 1:
        # Single domain selected: Display its details
        domain = selected_domains[0]
        filtered_df = df[df["Name"] == domain]
        if filtered_df.empty:
            return html.P("No data found for the selected domain.", style={"color": "red"})

        return html.Div([
            html.H4(f"Details for {domain}"),
            html.Ul([html.Li(f"{col}: {filtered_df[col].values[0]}") for col in filtered_df.columns]),
        ])

    elif len(selected_domains) == 2:
        # Two domains selected: Display comparison table
        filtered_df = df[df["Name"].isin(selected_domains)]
        if filtered_df.shape[0] != 2:
            return html.P("Could not find data for one or both domains.", style={"color": "red"})

        # Create a comparison table
        comparison_table = filtered_df.transpose()
        comparison_table.columns = selected_domains
        comparison_table["Difference"] = comparison_table[selected_domains[0]] != comparison_table[selected_domains[1]]

        # Format the data for the DataTable
        comparison_data = comparison_table.reset_index().rename(columns={"index": "Attribute"}).to_dict("records")

        # Highlight differences in the DataTable
        style_data_conditional = [
            {
                "if": {"filter_query": f'{{Difference}} contains "True"', "column_id": "Difference"},
                "backgroundColor": "tomato",
                "color": "white",
            }
        ]

        # Return the comparison table
        return dash_table.DataTable(
            columns=[
                {"name": "Attribute", "id": "Attribute"},
                {"name": selected_domains[0], "id": selected_domains[0]},
                {"name": selected_domains[1], "id": selected_domains[1]},
                {"name": "Difference", "id": "Difference"},
            ],
            data=comparison_data,
            style_data_conditional=style_data_conditional,
            style_table={"overflowX": "auto"},
            style_cell={"textAlign": "left"},
        )

    else:
        return html.P("Please select at most two domains to compare.", style={"color": "red"})

# Hide the family dropdown as it is not needed
@app.callback(
    Output("family-filter-container", "style"),
    [Input("url", "pathname")]
)
def toggle_family_filter(pathname):
    if pathname == "/domain-attributes":
        # Hide the family filter on the domain-specific page
        return {"display": "none"}
    # Show the family filter on all other pages
    return {"display": "block"}


In [None]:
@app.callback(
    Output("page-content", "children"),
    [
        Input("switch", "value"),
        Input("url", "pathname"),
        Input("family-filter", "value"),
    ]
)
def display_page(switch_on, pathname, selected_family):
    template = "minty" if switch_on else "minty_dark"

    # Filter data based on selected family
    if selected_family:
        filtered_df = df[df['Family'].isin(selected_family)]
    else:
        filtered_df = df

    if pathname == "/overview": 
        # Calculate family counts
        family_counts = filtered_df['Family'].value_counts()
        
        # Convert counts to percentages
        family_percentages = (family_counts / family_counts.sum()) * 100
        
        # Create a bar chart
        fig = px.bar(
            family_percentages,
            x=family_percentages.index,  # Categories (Family names)
            y=family_percentages.values,  # Percentage for each category
            labels={"x": "Family", "y": "Percentage (%)"},
            title="Family Distribution (Percentage)",
        )
        
        # Apply the template for theming
        fig.update_layout(template=template, yaxis_tickformat=".2f")  # Format y-axis for percentages
        
        # Return the figure wrapped in a Graph component
        return dcc.Graph(figure=fig)
    
    elif pathname == "/char-analysis":
        # Create a binary presence/absence for each letter
        letters = [col for col in df.columns if col.startswith('Freq_') and len(col) == 6]
        binary_df = filtered_df[letters].applymap(lambda x: 1 if x > 0 else 0)
        letter_totals = binary_df.sum()

        fig = px.histogram(
            x=letter_totals.index.str[-1],
            y=letter_totals.values,
            title="Character Presence Across Domain Names",
            labels={'x': 'Letter', 'y': 'Presence Count'}
        )
        fig.update_layout(template=template)
        return dcc.Graph(figure=fig)
    
    elif pathname == "/length-ratio":
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=filtered_df['Length'], name='Length'))
        
        # Add statistics
        fig.add_trace(go.Scatter(x=[filtered_df['Length'].mean()]*2, y=[0, filtered_df['Length'].count()], mode='lines', name='Mean'))
        fig.add_trace(go.Scatter(x=[filtered_df['Length'].median()]*2, y=[0, filtered_df['Length'].count()], mode='lines', name='Median'))
        fig.add_trace(go.Scatter(x=[filtered_df['Length'].std()]*2, y=[0, filtered_df['Length'].count()], mode='lines', name='STD'))
        
        fig.update_layout(title="Domain Name Length Distribution", xaxis_title="Length", yaxis_title="Count", template=template)
        return dcc.Graph(figure=fig)
    
    elif pathname == "/entropy-complexity":
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=filtered_df['Entropy'], name='Entropy'))
        
        # Add statistics
        fig.add_trace(go.Scatter(x=[filtered_df['Entropy'].mean()]*2, y=[0, filtered_df['Entropy'].count()], mode='lines', name='Mean'))
        fig.add_trace(go.Scatter(x=[filtered_df['Entropy'].median()]*2, y=[0, filtered_df['Entropy'].count()], mode='lines', name='Median'))
        fig.add_trace(go.Scatter(x=[filtered_df['Entropy'].std()]*2, y=[0, filtered_df['Entropy'].count()], mode='lines', name='STD'))
        
        fig.update_layout(title="Domain Name Entropy Distribution", xaxis_title="Entropy", yaxis_title="Count", template=template)
        return dcc.Graph(figure=fig)
    
    elif pathname == "/domain-analysis":
        # Create a 2D histogram
        heatmap_fig = go.Figure()
        
        heatmap_fig.add_trace(go.Histogram2d(
            x=filtered_df['Max_Let_Seq'],
            y=filtered_df['Entropy'],
            colorscale='Viridis',  # Choose a colormap
            nbinsx=50,  # Adjust the number of bins for resolution
            nbinsy=50,
            name='Density'
        ))
        
        heatmap_fig.update_layout(
            title="Max Letter Sequence vs Entropy (Density Heatmap)",
            xaxis_title="Max Letter Sequence",
            yaxis_title="Entropy",
            template=template
        )
        return dcc.Graph(figure=heatmap_fig)
    
    elif pathname == "/domain-attributes":
        return domain_attribute_viewer_layout()
    
    return dcc.Graph(figure=px.histogram(df, x="Label", template=template))


In [None]:
app.clientside_callback(
    """
    function(switchOn) {
        if(switchOn) {
            document.documentElement.setAttribute('data-bs-theme', 'light');
        } else {
            document.documentElement.setAttribute('data-bs-theme', 'dark');
        }
        return null;  // No update required
    }
    """,
    Output("dummy-output", "children"),  # Use a dummy output to avoid conflicts
    Input("switch", "value")
)

# Run the app
if __name__ == "__main__":
    try:
        app.run_server(debug=True, port=8050, use_reloader=False, threaded=True)
    except SystemExit as e:
        print(f"SystemExit occurred: {e}")


In [None]:
%tb