In [1]:
import os

import pandas as pd
import plotly.graph_objects as go
import plotly.io as pio

In [15]:
plots_dir = "../results/plots/"

x_title = "Margin"
y_title = "Annotations / Data Point"

In [3]:
results = pd.read_csv("../results/results_full.csv")
results["gap"] = results.apply(lambda row: f"{round(row['gap_rmse'], 3)}", axis=1)
results_df = results[['k', 'm', 'q', 'p', 'eps', 'rank_success', 'gap']]

In [20]:
k=1000
q=0.75
p=0.75

df = results_df[(results_df["k"] == k ) & (results_df["q"] == q) & (results_df["p"] == p)]

# Convert M to strings
df["m"] = df["m"].astype(str)

# Create pivot tables for heatmaps
rank_success_pivot = df.pivot(index="m", columns="eps", values="rank_success")
gap_pivot = df.pivot(index="m", columns="eps", values="gap")

# Define the numerical order for the y-axis
m_order = sorted(df["m"].astype(float).unique())  # Sort numerically
m_order = [str(int(m)) if m.is_integer() else str(m) for m in m_order]  # Convert to strings in the same format

# Ensure pivot tables match this order
rank_success_pivot = rank_success_pivot.loc[m_order]
gap_pivot = gap_pivot.loc[m_order]

# Heatmap for rank_success
heatmap_rank_success = go.Figure(data=go.Heatmap(z=rank_success_pivot.loc[m_order].values,
                                                    x=rank_success_pivot.columns,
                                                    y=rank_success_pivot.index,
                                                    zmin=0.5,
                                                    zmax=1,        
                                                    text=rank_success_pivot.values.round(3).astype(str),
                                                    texttemplate="%{text}",
                                                    colorbar_title="Rank Success",
                                                    textfont={"size": 12},
                                                    showscale=False))
heatmap_rank_success.update_layout(
    xaxis=dict(
        title=dict(
            text=x_title,
            font=dict(
                size=18
            )
        )
    ),
    yaxis=dict(
        title=dict(
            text=y_title,
            font=dict(
                size=12
            )
        )
    ), margin=dict(t=0,b=0,l=0,r=10), 
                                    height=200, width=500)

# Heatmap for gap
heatmap_gap = go.Figure(data=go.Heatmap(z=[[float(val.split(" ± ")[0]) for val in row] for row in gap_pivot.loc[m_order].values],
                                        x=gap_pivot.columns,
                                        y=gap_pivot.index,
                                        text=gap_pivot.values,
                                        reversescale=True,                                                    
                                        texttemplate="%{text}", 
                                        colorbar_title="Gap",
                                        textfont={"size": 12},
                                        showscale=False))
heatmap_gap.update_layout(xaxis=dict(
        title=dict(
            text=x_title,
            font=dict(
                size=18
            )
        )
    ),
    yaxis=dict(
        title=dict(
            text=y_title,
            font=dict(
                size=12
            )
        )
    ), margin=dict(t=0,b=0,l=0,r=10), 
                            height=200, width=500) 

pio.write_image(heatmap_rank_success, os.path.join(plots_dir, f"../rank-k={k}-q={q}-p={p}.pdf"), format="pdf")
pio.write_image(heatmap_gap, os.path.join(plots_dir, f"../gap-k={k}-q={q}-p={p}.pdf"), format="pdf")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



In [7]:
for k in set(results_df["k"].to_list()):
    for q in set(results_df["q"].to_list()):
        for p in set(results_df["p"].to_list()):
            df = results_df[(results_df["k"] == k ) & (results_df["q"] == q) & (results_df["p"] == p)]

            # Convert M to strings
            df["m"] = df["m"].astype(str)

            # Create pivot tables for heatmaps
            rank_success_pivot = df.pivot(index="m", columns="eps", values="rank_success")
            gap_pivot = df.pivot(index="m", columns="eps", values="gap")

            # Define the numerical order for the y-axis
            m_order = sorted(df["m"].astype(float).unique())  # Sort numerically
            m_order = [str(int(m)) if m.is_integer() else str(m) for m in m_order]  # Convert to strings in the same format

            # Ensure pivot tables match this order
            rank_success_pivot = rank_success_pivot.loc[m_order]
            gap_pivot = gap_pivot.loc[m_order]

            # Heatmap for rank_success
            heatmap_rank_success = go.Figure(data=go.Heatmap(z=rank_success_pivot.loc[m_order].values,
                                                             x=rank_success_pivot.columns,
                                                             y=rank_success_pivot.index,
                                                             zmin=0.5,
                                                             zmax=1,        
                                                             text=rank_success_pivot.values.round(3).astype(str),
                                                             texttemplate="%{text}",
                                                             colorbar_title="Rank Success",
                                                             textfont={"size": 12},
                                                             showscale=False))
            heatmap_rank_success.update_layout(title={"text": f"Raking Success (Budget: {k}, Annotator Accuracy: {q}, Base Accuracy: {p})", "x": 0.5, "xanchor": "center", "font": {"size": 13}}, 
                                               xaxis_title=x_title, yaxis_title=y_title, margin=dict(t=50,b=0,l=0,r=10), 
                                               height=300, width=500)

            # Heatmap for gap
            heatmap_gap = go.Figure(data=go.Heatmap(z=[[float(val.split(" ± ")[0]) for val in row] for row in gap_pivot.loc[m_order].values],
                                                    x=gap_pivot.columns,
                                                    y=gap_pivot.index,
                                                    text=gap_pivot.values,
                                                    reversescale=True,                                                    
                                                    texttemplate="%{text}", 
                                                    colorbar_title="Gap",
                                                    textfont={"size": 12},
                                                    showscale=False))
            heatmap_gap.update_layout(title={"text": f"Gap (Budget: {k}, Annotator Accuracy: {q}, Base Accuracy: {p})", "x": 0.5, "xanchor": "center", "font": {"size": 13}}, 
                                      xaxis_title=x_title, yaxis_title=y_title, margin=dict(t=50,b=0,l=0,r=10), 
                                      height=300, width=500) 

            pio.write_image(heatmap_rank_success, os.path.join(plots_dir, f"rank-k={k}-q={q}-p={p}.pdf"), format="pdf")
            pio.write_image(heatmap_gap, os.path.join(plots_dir, f"gap-k={k}-q={q}-p={p}.pdf"), format="pdf")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [10]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Define the unique values of k, p, and q
unique_k_values = set(results_df["k"].to_list())
unique_p_values = [0.6, 0.75, 0.9]
unique_q_values = [0.6, 0.75, 0.9]

# Directory to save plots
os.makedirs(plots_dir, exist_ok=True)

for k in unique_k_values:
    # Create a subplot grid without subplot titles
    fig = make_subplots(
        rows=len(unique_q_values), cols=len(unique_p_values),
        vertical_spacing=0.05, horizontal_spacing=0.02
    )

    for i, q in enumerate(unique_q_values, start=1):  # Rows
        for j, p in enumerate(unique_p_values, start=1):  # Columns
            # Filter data for specific k, q, p
            df = results_df[(results_df["k"] == k) & (results_df["q"] == q) & (results_df["p"] == p)]

            if df.empty:
                continue  # Skip if no data for this combination

            # Convert M to strings
            df["m"] = df["m"].astype(str)

            # Create pivot table for heatmap
            rank_success_pivot = df.pivot(index="m", columns="eps", values="rank_success")

            # Define the order for the y-axis
            m_order = sorted(df["m"].astype(float).unique())  # Sort numerically
            m_order = [str(int(m)) if m.is_integer() else str(m) for m in m_order]
            rank_success_pivot = rank_success_pivot.loc[m_order]

            # Extract gap_bias values and create text values for heatmap
            z_values = rank_success_pivot.loc[m_order].values
            text_values = rank_success_pivot.values  # Use original gap values for display

            # Add heatmap to subplot
            heatmap = go.Heatmap(
                z=z_values,
                x=rank_success_pivot.columns,
                y=m_order,
                showscale=False,
                text=text_values,  # Display text in heatmap cells
                texttemplate="%{text}",  # Use text values in cells
                textfont={"size": 13},  # Adjust text size
            )
            fig.add_trace(heatmap, row=i, col=j)

    # Update layout for the "heatmap of heatmaps"
    fig.update_layout(
        # title={"text": f"Budget: {k}", "x": 0.5, "xanchor": "center", "font": {"size": 24}},
        height=200 * len(unique_q_values),  # Adjust height based on rows
        width=500 * len(unique_p_values),  # Adjust width based on columns
        margin=dict(t=10, b=10, l=0, r=0)
    )

    # Add custom ticks only to the outer axes
    for i in range(1, len(unique_q_values) + 1):  # Rows
        fig.update_yaxes(
            title_text=f"Annotator Acc.: {unique_q_values[i-1]}",  # Add tick label
            row=i,
            col=1,  # Only the first column gets y-axis ticks
            ticks="outside"
        )
    for j in range(1, len(unique_p_values) + 1):  # Columns
        fig.update_xaxes(
            title_text=f"Better Classifier Acc.: {unique_p_values[j-1]}",  # Add tick label
            row=len(unique_q_values),  # Only the last row gets x-axis ticks
            col=j,
            ticks="outside"
        )

    # Save the grid of heatmaps to a PDF
    output_file = os.path.join(plots_dir, f"rank-heatmap_of_heatmaps_k={k}.pdf")
    fig.write_image(output_file, format="pdf")



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [None]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import os

# Define the unique values of k, p, and q
unique_k_values = set(results_df["k"].to_list())
unique_p_values = [0.6, 0.75, 0.9]
unique_q_values = [0.6, 0.75, 0.9]

# Directory to save plots
os.makedirs(plots_dir, exist_ok=True)

for k in unique_k_values:
    # Create a subplot grid without subplot titles
    fig = make_subplots(
        rows=len(unique_q_values), cols=len(unique_p_values),
        vertical_spacing=0.05, horizontal_spacing=0.02
    )

    for i, q in enumerate(unique_q_values, start=1):  # Rows
        for j, p in enumerate(unique_p_values, start=1):  # Columns
            # Filter data for specific k, q, p
            df = results_df[(results_df["k"] == k) & (results_df["q"] == q) & (results_df["p"] == p)]

            if df.empty:
                continue  # Skip if no data for this combination

            # Convert M to strings
            df["m"] = df["m"].astype(str)

            # Create pivot table for heatmap
            gap_pivot = df.pivot(index="m", columns="eps", values="gap")

            # Define the order for the y-axis
            m_order = sorted(df["m"].astype(float).unique())  # Sort numerically
            m_order = [str(int(m)) if m.is_integer() else str(m) for m in m_order]
            gap_pivot = gap_pivot.loc[m_order]

            # Extract gap_bias values and create text values for heatmap
            z_values = [[float(val.split(" ± ")[0]) for val in row] for row in gap_pivot.values]
            text_values = gap_pivot.values  # Use original gap values for display

            # Add heatmap to subplot
            heatmap = go.Heatmap(
                z=z_values,
                x=gap_pivot.columns,
                y=m_order,
                showscale=False,
                reversescale=True,
                text=text_values,  # Display text in heatmap cells
                texttemplate="%{text}",  # Use text values in cells
                textfont={"size": 13},  # Adjust text size
            )
            fig.add_trace(heatmap, row=i, col=j)

    # Update layout for the "heatmap of heatmaps"
    fig.update_layout(
        # title={"text": f"Budget: {k}", "x": 0.5, "xanchor": "center", "font": {"size": 24}},
        height=200 * len(unique_q_values),  # Adjust height based on rows
        width=500 * len(unique_p_values),  # Adjust width based on columns
        margin=dict(t=10, b=10, l=0, r=0)
    )

    # Add custom ticks only to the outer axes
    for i in range(1, len(unique_q_values) + 1):  # Rows
        fig.update_yaxes(
            title_text=f"Annotator Acc.: {unique_q_values[i-1]}",  # Add tick label
            row=i,
            col=1,  # Only the first column gets y-axis ticks
            ticks="outside"
        )
    for j in range(1, len(unique_p_values) + 1):  # Columns
        fig.update_xaxes(
            title_text=f"Better Classifier Acc.: {unique_p_values[j-1]}",  # Add tick label
            row=len(unique_q_values),  # Only the last row gets x-axis ticks
            col=j,
            ticks="outside"
        )

    # Save the grid of heatmaps to a PDF
    output_file = os.path.join(plots_dir, f"heatmap_of_heatmaps_k={k}.pdf")
    fig.write_image(output_file, format="pdf")