In [None]:
import pathlib
import pandas as pd
import wandb
api = wandb.Api()


### Download all runs from a sweep and store data in a csv.

In [None]:
# Labels all
sweep_id = "fh8c8zkv"

save_path = pathlib.Path().cwd() / "Results"  # For Jupyter notebook.

In [None]:
# Project is specified by <entity/project-name>
sweep = api.sweep(f"marko-krizmancic/gnn_fiedler_approx_v2/{sweep_id}")  # labels_all
runs = sweep.runs

summary_fields = ['best_train_loss', 'best_val_loss', 'mean_err', 'stddev_err', 'duration', 'good_within']
config_fields = ['selected_features']
base_features = {'degree', 'betweenness_centrality'}

records = []
for run in runs:
    summary = {}
    for k in summary_fields:
        if k == "good_within":
            good_within = {f"{k}.{k2}": v2 for k2, v2 in run.summary[k].items()}
            summary.update(good_within)
        else:
            summary[k] = run.summary[k]

    config = {}
    base_feature_set = set(run.config["selected_features"]) & base_features
    if len(base_feature_set) == 0:
        config["base_feature"] = "none"
    elif len(base_feature_set) == 1:
        config["base_feature"] = base_feature_set.pop()
    else:
        config["base_feature"] = "degree"

    test_feature_set = set(run.config["selected_features"]) - set([config["base_feature"]])
    if len(test_feature_set) == 0:
        config["test_feature"], config["base_feature"] = config["base_feature"], "none"
    else:
        config["test_feature"] = test_feature_set.pop()

    info = {"name": run.name, "id": run.id}
    records.append({**info, **config, **summary})

df = pd.DataFrame.from_records(records)
df['base_feature'] = pd.Categorical(df['base_feature'], categories=sorted(df['base_feature'].unique(), reverse=True), ordered=True)
df = df.sort_values('base_feature').reset_index(drop=True)
df


In [None]:
import plotly.express as px

only_degree_baseline = working_df.loc[(working_df['base_feature'] == 'none') & (working_df['test_feature'] == 'degree')]['good_within.99'].values[0]
only_betweenness_baseline = working_df.loc[(working_df['base_feature'] == 'none') & (working_df['test_feature'] == 'betweenness_centrality')]['good_within.99'].values[0]
degree_and_betweenness_baseline = working_df.loc[(working_df['base_feature'] == 'degree') & (working_df['test_feature'] == 'betweenness_centrality')]['good_within.99'].values[0]

print(only_degree_baseline)
fig = px.histogram(
    working_df,
    x="test_feature",
    y="good_within.99",
    color="base_feature",
    barmode="group",
)
fig.add_hline(y=only_degree_baseline, line_dash="dash", line_color=px.colors.qualitative.Plotly[0])
fig.add_hline(y=degree_and_betweenness_baseline, line_dash="dash", line_color=px.colors.qualitative.Plotly[1])
fig.add_hline(y=only_betweenness_baseline, line_dash="dash", line_color=px.colors.qualitative.Plotly[2])

fig.show()

### Analyze selected features


In [None]:
# Labels all
sweep_id = "uw292q9u"

save_path = pathlib.Path().cwd() / "Results"  # For Jupyter notebook.

In [None]:
# Project is specified by <entity/project-name>
sweep = api.sweep(f"marko-krizmancic/gnn_fiedler_approx_v2/{sweep_id}")  # labels_all
runs = sweep.runs

summary_fields = ['best_train_loss', 'best_val_loss', 'mean_err', 'stddev_err', 'duration', 'good_within']

records = []
for run in runs:
    summary = {}
    for k in summary_fields:
        if k == "good_within":
            good_within = {f"{k}.{k2}": v2 for k2, v2 in run.summary[k].items()}
            summary.update(good_within)
        else:
            summary[k] = run.summary[k]

    config = {}
    config["selected_features"] = run.config["selected_features"]
    config["learning_rate"] = run.config["learning_rate"]

    info = {"name": run.name, "id": run.id}
    records.append({**info, **config, **summary})

df = pd.DataFrame.from_records(records)
df


In [None]:
import plotly.graph_objects as go

options = [
    ["degree", "degree_centrality"],
    ["degree", "degree_centrality", "betweenness_centrality"],
    ["degree", "degree_centrality", "core_number", "triangles", "clustering", "close_centrality"]
]
variants = ["K_cycle_count_matrix", "A_matrix_row", "random_walk_pe"]

# Prepare data for plotting
def get_option_label(option):
    return ", ".join(option)

plot_data = []
group_labels = ["No additional"] + variants
option_labels = [get_option_label(opt) for opt in options]

working_df = df[df['learning_rate'] > 0.003]  # Adjust this value as needed

for group in group_labels:
    for i, option in enumerate(options):
        # Build the expected selected_features list
        if group == "No additional":
            features = option
        else:
            features = option + [group]
        # Find the row in df matching this combination
        match = working_df[working_df['selected_features'].apply(lambda x: set(x) == set(features))]
        y_val = match['good_within.99'].values[0] if not match.empty else None
        plot_data.append({
            "group": group,
            "option": option_labels[i],
            "good_within.99": y_val
        })

# Convert to DataFrame for easier plotting
plot_df = pd.DataFrame(plot_data)

# Create grouped bar chart
fig = go.Figure()
for option in option_labels:
    fig.add_trace(go.Bar(
        x=group_labels,
        y=[plot_df[(plot_df['group'] == group) & (plot_df['option'] == option)]['good_within.99'].values[0] for group in group_labels],
        name=option
    ))

fig.update_layout(
    barmode='group',
    xaxis_title="Variant",
    yaxis_title="good_within.99",
    title="Grouped Bar Chart of Experiments by Selected Features and Variant"
)
fig.show()

