# How to Make a Tutorial Visualization

In [None]:
import datamapplot
import pandas as pd
import numpy as np
import random
from sklearn.datasets import make_blobs

# Step 1: Generate synthetic clustered data using sklearn's make_blobs
n_samples = 1000
n_features = 2
n_clusters = 5

X, y = make_blobs(n_samples=n_samples, n_features=n_features, centers=n_clusters, cluster_std=1.5, random_state=42)

# Step 2: Create a DataFrame to hold the generated data and additional metadata
viz_data = pd.DataFrame(X, columns=['x_tsne', 'y_tsne'])
viz_data['topic'] = y

# Map cluster numbers to high-level topic names
topic_names = {0: 'Browsing', 1: 'Social Media', 2: 'Cats', 3: 'Dogs', 4: 'Fashion'}
viz_data['topic'] = viz_data['topic'].map(topic_names)

# Step 3: Add other metadata for apps, app categories, and participants
viz_data['app'] = np.random.choice(['Instagram', 'Twitter', 'Google Chrome', 'Gmail'], n_samples)
viz_data['app_category'] = np.random.choice(['Social', 'Communication', 'Gaming', 'System UI'], n_samples)
viz_data['participant'] = np.random.choice(['Participant_1', 'Participant_2', 'Participant_3'], n_samples)

# Step 4: Generate hover text based on app, app category, and participant
viz_data['hover_text'] = viz_data.apply(
    lambda row: f"App: {row['app']}, App Category: {row['app_category']}, Participant: {row['participant']}, Topic: {row['topic']}", axis=1
)

# Step 5: Assign random colors to apps for the legend and marker colors
apps = viz_data['app'].unique()
random.seed(42)  # For reproducibility
app_color_mapping = {app: "#{:02x}{:02x}{:02x}".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255)) for app in apps}
viz_data['color'] = viz_data['app'].map(app_color_mapping)

# Step 6: Add explicit label points to the dataset at boundary locations for each topic to distribute labels around the plot
label_points = pd.DataFrame({
    'x_tsne': [X[y == 0][:, 0].mean(), X[y == 1][:, 0].mean(), X[y == 2][:, 0].mean(), X[y == 3][:, 0].mean(), X[y == 4][:, 0].mean()],
    'y_tsne': [X[y == 0][:, 1].mean(), X[y == 1][:, 1].mean(), X[y == 2][:, 1].mean(), X[y == 3][:, 1].mean(), X[y == 4][:, 1].mean()],
    'app': [''] * n_clusters,
    'app_category': ['Label'] * n_clusters,
    'participant': [''] * n_clusters,
    'topic': ['Browsing', 'Social Media', 'Cats', 'Dogs', 'Fashion'],
    'hover_text': ['Topic Label: Browsing', 'Topic Label: Social Media', 'Topic Label: Cats', 'Topic Label: Dogs', 'Topic Label: Fashion'],
    'color': ['#000000'] * n_clusters
})

# Step 7: Combine label points with the original dataset
viz_data = pd.concat([viz_data, label_points], ignore_index=True)

# Step 8: Prepare the data for the interactive plot
xy_tsne = np.array(viz_data[['x_tsne', 'y_tsne']].values.tolist())
hover_data = viz_data['hover_text'].to_numpy()

# Step 9: Define custom CSS for the legend
custom_css = """
.row {
    display: flex;
    align-items: center;
}
.box {
    height: 10px;
    width: 10px;
    border-radius: 2px;
    margin-right: 5px;
    padding: 0px 0 1px 0;
    text-align: center;
    color: white;
    font-size: 12px;
}
#legend {
    position: absolute;
    top: 0;
    right: 0;
    max-height: 100%;
    overflow-y: scroll;
}
#title-container {
    max-width: 75%;
}
"""

# HTML for custom legend with app-based colors
custom_html = """
<div id="legend" class="container-box">
"""
for app, color in app_color_mapping.items():
    custom_html += f'    <div class="row"><div id="{app}" class="box" style="background-color:{color};"></div>{app}</div>\n'
custom_html += "</div>\n"

# Step 10: Custom JavaScript for legend interaction
custom_js = """
const legend = document.getElementById("legend");
const selectedPrimaryFields = new Set();

legend.addEventListener('click', function (event) {
    const selectedField = event.target.id;

    if (selectedField) {
        if (selectedPrimaryFields.has(selectedField)) {
            selectedPrimaryFields.delete(selectedField);
            event.target.innerHTML = "";  // Remove check mark
        } else {
            selectedPrimaryFields.add(selectedField);
            event.target.innerHTML = "✓";  // Add check mark
        }
    }

    const selectedIndices = [];
    datamap.metaData.app.forEach((app, i) => {
        if (selectedPrimaryFields.size === 0 || selectedPrimaryFields.has(app)) {
            selectedIndices.push(i);
        }
    });

    datamap.addSelection(selectedIndices, "legend");
});
"""

# Step 11: Hover Text Template
hover_text_template = """
<div style="font-size:12pt;padding:2px;">
    <strong>App:</strong> {app}<br>
    <strong>App Category:</strong> {app_category}<br>
    <strong>Participant:</strong> {participant}<br>
    <strong>Topic:</strong> {topic}
</div>
"""

# Step 12: Create the interactive plot
plot = datamapplot.create_interactive_plot(
    xy_tsne,
    viz_data['topic'],  # Labels for high-level topics (used for hover text)
    initial_zoom_fraction=0.9,
    point_radius_min_pixels=1,
    point_radius_max_pixels=16,
    enable_search=True,
    marker_color_array=viz_data['color'].to_numpy(),
    custom_css=custom_css,
    custom_html=custom_html,
    custom_js=custom_js,
    title="Interactive Plot Tutorial",
    sub_title="using simulated data",
    extra_point_data=viz_data,
    hover_text_html_template=hover_text_template
)
plot.save("interactive_tutorial.html")

In [None]:
# How to Make an App-Based Visualization

In [None]:
import datamapplot
import seaborn as sns
import pandas as pd
import numpy as np
import random

# Step 0: Load the data and extract the necessary columns
viz_data = pd.read_csv('/path/to/data/file',low_memory=False) # Load your data here, e.g., from a CSV file that contains reduced embeddings and metadata
# Step 1: Extract the necessary columns from `viz_data`
app_extra_data = viz_data[['app', 'app_category', 'participant_anonymized']].copy()

# Step 2: Rename columns for better readability
app_extra_data = app_extra_data.rename(columns={'app': 'application', 'app_category': 'category', 'participant_anonymized': 'participant_anonymized'})

# Step 3: Define your t-SNE coordinates and other data
xy_tsne = np.array(viz_data[['x_umap', 'y_umap']].values.tolist()) # Use your t-SNE or UMAP coordinates here

# Step 4: Generate hover text based on the data in `viz_data` and add it to `app_extra_data`
app_extra_data['hover_text'] = viz_data.apply(lambda row: f"App: {row['app']}, App Category: {row['app_category']}, Participant: {row['participant_anonymized']}", axis=1)

# Step 5: Assign colors to each application
applications = np.unique(app_extra_data['application'])
random.seed(42)  # for reproducibility
app_color_mapping = {}

# Generate unique random colors for each application
for app in applications:
    random_color = "#{:02x}{:02x}{:02x}".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    app_color_mapping[app] = random_color

# Add color column to `app_extra_data` based on application
app_extra_data['app_color'] = app_extra_data['application'].map(app_color_mapping)

# Step 6: Construct the custom legend for applications
custom_css_app = """
.row {
    display: flex;
    align-items: center;
}
.box {
    height: 10px;
    width: 10px;
    border-radius: 2px;
    margin-right: 5px;
    padding: 0px 0 1px 0;
    text-align: center;
    color: white;
    font-size: 12px;
}
#legend {
    position: absolute;
    top: 0;
    right: 0;
    max-height: 100%;
    overflow-y: scroll;
}
#title-container {
    max-width: 75%;
}
"""

# HTML legend construction for app-based coloring
app_legend_html = """
<div id="legend" class="container-box">
"""
for app, color in app_color_mapping.items():
    app_legend_html += f'    <div class="row"><div id="{app}" class="box" style="background-color:{color};"></div>{app}</div>\n'
app_legend_html += "</div>\n"

# Step 7: Custom JavaScript for legend interaction
custom_js_app = """
const legend = document.getElementById("legend");
const selectedPrimaryFields = new Set();

legend.addEventListener('click', function (event) {
    const selectedField = event.target.id;

    if (selectedField) {
        if (selectedPrimaryFields.has(selectedField)) {
            selectedPrimaryFields.delete(selectedField);
            event.target.innerHTML = "";  // Remove check mark
        } else {
            selectedPrimaryFields.add(selectedField);
            event.target.innerHTML = "✓";  // Add check mark
        }
    }

    const selectedIndices = [];
    datamap.metaData.application.forEach((application, i) => {
        if (selectedPrimaryFields.size === 0 || selectedPrimaryFields.has(application)) {
            selectedIndices.push(i);
        }
    });

    datamap.addSelection(selectedIndices, "legend");
});
"""

# Custom HTML hover text template (same as the previous plots)
hover_text_template_app = """
<div style="font-size:12pt;padding:2px;">
    <strong>App:</strong> {application}<br>
    <strong>Category:</strong> {category}<br>
    <strong>Participant:</strong> {participant_anonymized}
</div>
"""

# Define label color mapping for applications (labels are apps, colored by the application itself)
label_color_mapping_app = dict(
    zip(
        app_extra_data['application'],  # Unique apps
        app_extra_data['application'].map(app_color_mapping)  # Map the app's name to its color
    )
)

# Step 8: Create the interactive plot for app-based coloring
plot_app = datamapplot.create_interactive_plot(
    xy_tsne,
    app_extra_data['application'],
    initial_zoom_fraction=0.9,
    point_radius_min_pixels=1,
    point_radius_max_pixels=16,
    hover_text=app_extra_data['hover_text'].to_numpy(),  # Use the hover text from `app_extra_data`
    enable_search=True,
    label_color_map=label_color_mapping_app,  # Color labels based on the application itself
    marker_color_array=app_extra_data['app_color'].to_numpy(),  # Marker color by application
    custom_css=custom_css_app,
    custom_html=app_legend_html,
    custom_js=custom_js_app,
    title="Media Content Atlas",
    sub_title="Using App-Based Classification and Coloring of Screenshot Embeddings",
    extra_point_data=app_extra_data,  # Add extra data, including categories and colors
    hover_text_html_template=hover_text_template_app
)
plot_app.save("app_based_visualization.html")


# How to Make an Participant-Based Visualization

In [None]:
import datamapplot
import seaborn as sns
import pandas as pd
import numpy as np
import random


# Step 0: Load the data and extract the necessary columns
viz_data = pd.read_csv('/path/to/data/file',low_memory=False) # Load your data here, e.g., from a CSV file that contains reduced embeddings and metadata

# Step 1: Extract the necessary columns from `viz_data`
app_extra_data = viz_data[['app', 'app_category', 'participant_anonymized']].copy()

# Step 2: Rename columns for better readability
app_extra_data = app_extra_data.rename(columns={'app': 'application', 'app_category': 'category', 'participant_anonymized': 'participant_anonymized'})

# Step 3: Define your t-SNE coordinates and other data
xy_tsne = np.array(viz_data[['x_tsne', 'y_tsne']].values.tolist()) # Use your t-SNE or UMAP coordinates here

# Step 4: Generate hover text based on the data in `viz_data` and add it to `app_extra_data`
app_extra_data['hover_text'] = viz_data.apply(lambda row: f"App: {row['app']}, App Category: {row['app_category']}, Participant: {row['participant_anonymized']}", axis=1)

# Step 5: Assign colors to participants
participants = np.unique(app_extra_data['participant_anonymized'])
random.seed(42)  # for reproducibility
participant_color_mapping = {}

# Generate unique random colors for each participant
for participant in participants:
    random_color = "#{:02x}{:02x}{:02x}".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    participant_color_mapping[participant] = random_color

# Add color column to `app_extra_data` based on participants
app_extra_data['participant_color'] = app_extra_data['participant_anonymized'].map(participant_color_mapping)

# Step 6: Construct the custom legend for participants
custom_css_participant = """
.row {
    display: flex;
    align-items: center;
}
.box {
    height: 10px;
    width: 10px;
    border-radius: 2px;
    margin-right: 5px;
    padding: 0px 0 1px 0;
    text-align: center;
    color: white;
    font-size: 12px;
}
#legend {
    position: absolute;
    top: 0;
    right: 0;
    max-height: 100%;
    overflow-y: scroll;
}
#title-container {
    max-width: 75%;
}
"""

# HTML legend construction for participant-based coloring
participant_legend_html = """
<div id="legend" class="container-box">
"""
for participant, color in participant_color_mapping.items():
    participant_legend_html += f'    <div class="row"><div id="{participant}" class="box" style="background-color:{color};"></div>{participant}</div>\n'
participant_legend_html += "</div>\n"

# Step 7: Custom JavaScript for legend interaction
custom_js_participant = """
const legend = document.getElementById("legend");
const selectedPrimaryFields = new Set();

legend.addEventListener('click', function (event) {
    const selectedField = event.target.id;

    if (selectedField) {
        if (selectedPrimaryFields.has(selectedField)) {
            selectedPrimaryFields.delete(selectedField);
            event.target.innerHTML = "";  // Remove check mark
        } else {
            selectedPrimaryFields.add(selectedField);
            event.target.innerHTML = "✓";  // Add check mark
        }
    }

    const selectedIndices = [];
    datamap.metaData.participant_anonymized.forEach((participant_anonymized, i) => {
        if (selectedPrimaryFields.size === 0 || selectedPrimaryFields.has(participant_anonymized)) {
            selectedIndices.push(i);
        }
    });

    datamap.addSelection(selectedIndices, "legend");
});
"""

# Custom HTML hover text template
hover_text_template_participant = """
<div style="font-size:12pt;padding:2px;">
    <strong>App:</strong> {application}<br>
    <strong>Category:</strong> {category}<br>
    <strong>Participant:</strong> {participant_anonymized}
</div>
"""

# Define label color mapping for participants (labels are apps, colored by participant)
label_color_mapping_participant = dict(
    zip(
        app_extra_data['application'],  # Unique apps
        app_extra_data['participant_anonymized'].map(participant_color_mapping)  # Map the app's participant to its color
    )
)

# Step 8: Create the interactive plot for participant-based coloring
plot_participant = datamapplot.create_interactive_plot(
    xy_tsne,
    app_extra_data['application'],
    initial_zoom_fraction=0.9,
    point_radius_min_pixels=1,
    point_radius_max_pixels=16,
    hover_text=app_extra_data['hover_text'].to_numpy(),  # Use the hover text from `app_extra_data`
    enable_search=True,
    label_color_map=label_color_mapping_participant,  # Color labels based on participant
    marker_color_array=app_extra_data['participant_color'].to_numpy(),  # Marker color by participant
    custom_css=custom_css_participant,
    custom_html=participant_legend_html,
    custom_js=custom_js_participant,
    title="Media Content Atlas",
    sub_title="using App Name and Participant-Based Coloring of Screenshot Embeddings",
    extra_point_data=app_extra_data,  # Add extra data, including categories and colors
    hover_text_html_template=hover_text_template_participant
)

plot_participant.save("app_name__example_coloredbyparticipant.html")

# How to Make an Content-Based Visualization (MCA)

In [None]:
import datamapplot
import seaborn as sns
import pandas as pd
import numpy as np
import random
import textwrap

# Step 0: Load the data and extract the necessary columns
viz_data = pd.read_csv('/path/to/data/file',low_memory=False) # Load your data here, e.g., from a CSV file that contains reduced embeddings and metadata

line_width = 50  # Adjust the value to control wrapping width

# Function to wrap the description
def wrap_description(description):
    return ' '.join(textwrap.wrap(description, width=line_width))
viz_data['wrapped_description']=viz_data['Document'].apply(lambda x: wrap_description(x)) # Wrap the description for the hover text


app_extra_data = viz_data[['app', 'app_category','participant_id','cluster_1_labels','detailed_topic_1_labels']].copy()

app_extra_data = app_extra_data.rename(columns={'app': 'application', 'app_category': 'category','participant_id':'participant','detailed_topic_1_labels':'topic','cluster_1_labels':'bigger_topic'})
xy_tsne = np.array(viz_data[['x_tsne', 'y_tsne']].values.tolist())
max_length = 150  # Set a reasonable limit for descriptions (e.g., 150 characters, the more you show, the more cluttered and slow the plot will be)

app_extra_data['hover_text'] = viz_data.apply(lambda row: f"""
    <strong>App:</strong> {row['app']}<br>
    <strong>Category:</strong> {row['app_category']}<br>
    <strong>Participant:</strong> {row['participant_id']}<br>
    <strong>Topic:</strong> {row['detailed_topic_1_labels']}<br>
    <strong>Desc:</strong>{row['wrapped_description'][23:25+max_length]}{'...' if len(row['wrapped_description']) > max_length else ''}<br>
""".replace("\n", "").replace("\r", "").strip(), axis=1)
topics = np.unique(app_extra_data['topic'])
random.seed(42)  # for reproducibility
color_mapping = {}

for topic in topics:
    random_color = "#{:02x}{:02x}{:02x}".format(random.randint(0, 255), random.randint(0, 255), random.randint(0, 255))
    color_mapping[topic] = random_color

color_mapping["Mobile App User Interface"]="#cccccc"

app_extra_data['color'] = app_extra_data['topic'].map(color_mapping)



# Step 6: Construct the custom legend for app categories
custom_css = """
.row {
    display: flex;
    align-items: center;
}
.box {
    height: 10px;
    width: 10px;
    border-radius: 2px;
    margin-right: 5px;
    padding: 0px 0 1px 0;
    text-align: center;
    color: white;
    font-size: 10px;
}
#legend {
    position: absolute;
    top: 0;
    right: 0;
    max-height: 100%;
    overflow-y: scroll;
    overflow-x: hidden;
    width: 300px; 
}
#title-container {
    max-width: 75%;
}
"""

custom_html = """
<div id="legend" class="container-box">
"""
for topic, color in color_mapping.items():
    custom_html += f'    <div class="row"><div id="{topic}" class="box" style="background-color:{color};"></div>{topic}</div>\n'
custom_html += "</div>\n"



custom_js = """
const legend = document.getElementById("legend");
const selectedPrimaryFields = new Set();

legend.addEventListener('click', function (event) {
    const selectedField = event.target.id;

    if (selectedField) {
        if (selectedPrimaryFields.has(selectedField)) {
            selectedPrimaryFields.delete(selectedField);
            event.target.innerHTML = "";  // Remove check mark
        } else {
            selectedPrimaryFields.add(selectedField);
            event.target.innerHTML = "✓";  // Add check mark
        }
    }

    const selectedIndices = [];
    datamap.metaData.topic.forEach((topic, i) => {
        if (selectedPrimaryFields.size === 0 || selectedPrimaryFields.has(topic)) {
            selectedIndices.push(i);
        }
    });

    datamap.addSelection(selectedIndices, "legend");
});

"""
colors_np=app_extra_data['color'].to_numpy()
hover_text_template = """
    <div style="font-size:12pt;padding:2px;">
        {hover_text}
    </div>
"""
app_extra_data.drop(columns=['application','category','participant','color'],inplace=True)

# Step 8: Create the interactive plot
plot = datamapplot.create_interactive_plot(
    xy_tsne,
    app_extra_data['topic'],
    app_extra_data['bigger_topic'],
    initial_zoom_fraction=0.9,
    point_radius_min_pixels=1,
    point_radius_max_pixels=16,
    enable_search=True,
    marker_color_array=colors_np,
    custom_css=custom_css,
    custom_html=custom_html,
    custom_js=custom_js,
    title="Media Content Atlas",
    noise_label="Mobile App User Interface",
    extra_point_data=app_extra_data,
    hover_text_html_template=hover_text_template
    )

plot.save("MCA_topic_based_example.html")