In [6]:
import pandas as pd
import numpy as np
import tensorflow as tf
import networkx as nx
import json
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score
import random
import pickle

print("Libraries imported successfully.")


Libraries imported successfully.


In [7]:
# Section 2: Preprocessing and Graph Construction

# File paths
# original_features_path = "../foursquare_data/features.json"
# original_edges_path = "../foursquare_data/edges.csv"

original_features_path = "../gowalla_data/gowalla_checkins.json"
original_edges_path = "../gowalla_data/gowalla_edges.csv"

# Load data
def load_data():
    print("Loading original features and edges...")
    with open(original_features_path, "r") as f:
        features = json.load(f)
    print(f"Loaded {len(features)} nodes with features.")
    
    edges = pd.read_csv(
        original_edges_path, names=["source", "target"], skiprows=1
    )  # Skip header row
    print(f"Loaded {len(edges)} edges.")
    return features, edges

# Create graph and extract largest connected component
def create_graph(features, edges):
    print("Creating graph from edges...")
    edges["source"] = edges["source"].astype(str)
    edges["target"] = edges["target"].astype(str)
    G = nx.from_pandas_edgelist(edges, source="source", target="target")
    print(f"Graph created with {len(G.nodes)} nodes and {len(G.edges)} edges.")

    print("Identifying the largest connected component...")
    largest_cc = max(nx.connected_components(G), key=len)
    G_lcc = G.subgraph(largest_cc).copy()
    print(f"Largest connected component has {len(G_lcc.nodes)} nodes and {len(G_lcc.edges)} edges.")

    print("Assigning features to nodes...")
    for node, feats in features.items():
        if node in G_lcc.nodes:
            G_lcc.nodes[node]["features"] = feats
    print("Node features assigned.")
    
    return G_lcc

def standardize_features(G, output_dim=128):
    print("Standardizing features to fixed dimensions...")

    # Extract nodes with features
    nodes_with_features = [
        node for node, feats in nx.get_node_attributes(G, "features").items() if feats
    ]
    print(f"Number of nodes with features: {len(nodes_with_features)}")

    # Extract feature lists for nodes with features
    feature_list = [
        set(G.nodes[node]["features"]) for node in nodes_with_features
    ]
    
    mlb = MultiLabelBinarizer()
    binary_features = mlb.fit_transform(feature_list)
    print(f"Initial feature matrix shape: {binary_features.shape}")

    # # Apply PCA if necessary
    # if binary_features.shape[1] > output_dim:
    #     print(f"Reducing dimensions to {output_dim} using PCA...")
    #     pca = PCA(n_components=output_dim)
    #     reduced_features = pca.fit_transform(binary_features)
    #     print(f"Feature matrix shape after PCA: {reduced_features.shape}")
    # else:
    #     print(f"No dimensionality reduction needed. Retaining shape {binary_features.shape}")
    #     reduced_features = binary_features

    # Assign standardized features back to the corresponding nodes
    # print("Assigning standardized features back to nodes...")
    # for idx, node in enumerate(nodes_with_features):
    #     G.nodes[node]["features"] = reduced_features[idx]
    
    # print("Feature standardization complete.")



# Load, process, and standardize graph
print("Starting graph preprocessing...")
features, edges = load_data()
G = create_graph(features, edges)
standardize_features(G, output_dim=128)
print(f"Graph preprocessing complete. Final graph has {len(G.nodes)} nodes and {len(G.edges)} edges.")


Starting graph preprocessing...
Loading original features and edges...
Loaded 107092 nodes with features.
Loaded 1900653 edges.
Creating graph from edges...
Graph created with 196591 nodes and 950327 edges.
Identifying the largest connected component...
Largest connected component has 196591 nodes and 950327 edges.
Assigning features to nodes...
Node features assigned.
Standardizing features to fixed dimensions...
Number of nodes with features: 107092
Initial feature matrix shape: (107092, 103736)
Graph preprocessing complete. Final graph has 196591 nodes and 950327 edges.


In [None]:
def check_connectivity_bfs(G):
    print("Performing BFS to ensure all nodes are connected...")
    start_node = next(iter(G.nodes))  # Get an arbitrary starting node
    visited = set()
    queue = [start_node]

    while queue:
        node = queue.pop(0)
        if node not in visited:
            visited.add(node)
            queue.extend(neighbor for neighbor in G.neighbors(node) if neighbor not in visited)

    if len(visited) == len(G.nodes):
        print("All nodes are connected. The graph is a single connected component.")
    else:
        print(f"Graph is not fully connected. Only {len(visited)} out of {len(G.nodes)} nodes are reachable.")


check_connectivity_bfs(G)  # Ensure the graph is a single connected component

Performing BFS to ensure all nodes are connected...
All nodes are connected. The graph is a single connected component.


In [4]:
# THIS CELL TAKES TOO LONG
# %% 
# Section 3A: Create Feature Vectors

def create_feature_vectors(G, edges):
    print("Creating feature vectors for ML tasks...")

    # Precompute node features as numpy arrays for faster access
    node_features = {}
    for n in G.nodes():
        if "features" in G.nodes[n]:
            node_features[n] = np.array(G.nodes[n]["features"], dtype=np.float32)

    # Convert edges to a NumPy array for faster iteration
    edges_array = edges[["source", "target"]].values

    X_pos = []
    y_pos = []

    print("Processing positive samples (existing edges)...")
    # Iterate over edges without using iterrows
    for i, (src, tgt) in enumerate(edges_array):
        node1, node2 = str(src), str(tgt)
        if node1 in node_features and node2 in node_features:
            # Directly use precomputed numpy arrays
            feature_vector = node_features[node1] - node_features[node2]
            X_pos.append(feature_vector)
            y_pos.append(1)
        if i % 50000 == 0 and i > 0:
            print(f"Processed {i} positive samples.")

    print("Generating negative samples (random non-existing edges)...")
    X_neg = []
    y_neg = []
    all_nodes = list(node_features.keys())
    n_edges = len(edges_array)

    # Generate negative samples
    for i in range(n_edges):
        node1, node2 = np.random.choice(all_nodes, 2, replace=False)
        if not G.has_edge(node1, node2):
            feature_vector = node_features[node1] - node_features[node2]
            X_neg.append(feature_vector)
            y_neg.append(0)
        if i % 50000 == 0 and i > 0:
            print(f"Generated {i} negative samples.")

    # Convert to numpy arrays once at the end
    X = np.vstack([X_pos, X_neg])
    y = np.hstack([y_pos, y_neg])

    print(f"Feature vectors created. Total samples: {X.shape[0]}")
    return X, y

# Create and split feature vectors
print("Creating and splitting feature vectors...")
X, y = create_feature_vectors(G, edges)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set size: {X_train.shape[0]}, Test set size: {X_test.shape[0]}")

with open("feature_vectors.pkl", "wb") as f:
    pickle.dump((X, y, X_train, X_test, y_train, y_test), f)

print("Feature vectors and splits saved successfully.")


Creating and splitting feature vectors...
Creating feature vectors for ML tasks...
Processing positive samples (existing edges)...
Processed 50000 positive samples.
Processed 100000 positive samples.
Processed 150000 positive samples.
Processed 200000 positive samples.
Processed 250000 positive samples.
Processed 300000 positive samples.
Processed 350000 positive samples.
Processed 400000 positive samples.
Processed 450000 positive samples.
Processed 500000 positive samples.
Processed 550000 positive samples.
Processed 600000 positive samples.
Processed 650000 positive samples.
Processed 700000 positive samples.
Processed 750000 positive samples.
Processed 800000 positive samples.
Processed 850000 positive samples.
Processed 900000 positive samples.
Processed 950000 positive samples.
Processed 1000000 positive samples.
Processed 1050000 positive samples.
Processed 1100000 positive samples.
Processed 1150000 positive samples.
Processed 1200000 positive samples.
Processed 1250000 positiv

KeyboardInterrupt: 

In [11]:
def create_feature_vectors(G, edges):
    print("Creating feature vectors for ML tasks...")

    # Precompute node features as numpy arrays for faster access
    node_features = {}
    for n in G.nodes():
        if "features" in G.nodes[n]:
            node_features[n] = np.array(G.nodes[n]["features"], dtype=np.float32)

    # Convert edges to a NumPy array for faster iteration
    edges_array = edges[["source", "target"]].values

    # Build adjacency dictionary for O(1) edge checks
    # This allows checking if (node1,node2) is an edge via: node2 in adjacency[node1]
    print("Building adjacency dictionary...")
    adjacency = {}
    for src, tgt in edges_array:
        node1, node2 = str(src), str(tgt)
        if node1 not in adjacency:
            adjacency[node1] = set()
        if node2 not in adjacency:
            adjacency[node2] = set()
        adjacency[node1].add(node2)
        adjacency[node2].add(node1)

    # Positive samples
    X_pos = []
    y_pos = []
    print("Processing positive samples (existing edges)...")
    for i, (src, tgt) in enumerate(edges_array):
        node1, node2 = str(src), str(tgt)
        if node1 in node_features and node2 in node_features:
            feature_vector = node_features[node1] - node_features[node2]
            X_pos.append(feature_vector)
            y_pos.append(1)
        if i % 50000 == 0 and i > 0:
            print(f"Processed {i} positive samples.")

    # Negative samples
    print("Generating negative samples (random non-existing edges)...")
    X_neg = []
    y_neg = []
    all_nodes = list(node_features.keys())
    n_edges = len(edges_array)
    all_nodes_array = np.array(all_nodes)

    # We'll generate negative samples in batches until we have n_edges of them
    num_created = 0
    batch_size = 100000  # You may adjust this for memory/speed trade-offs
    while num_created < n_edges:
        # Generate a batch of random node pairs
        idx_batch = np.random.randint(len(all_nodes), size=(batch_size, 2))
        # Filter out self-loops
        valid_mask = idx_batch[:, 0] != idx_batch[:, 1]
        idx_batch = idx_batch[valid_mask]

        pairs = all_nodes_array[idx_batch]
        # Check for non-edges
        # If node2 not in adjacency[node1], then it's a non-edge
        for node1, node2 in pairs:
            # Quickly skip if we already have enough samples
            if num_created >= n_edges:
                break
            # Check if it's a non-edge
            if node2 not in adjacency.get(node1, ()):
                feature_vector = node_features[node1] - node_features[node2]
                X_neg.append(feature_vector)
                y_neg.append(0)
                num_created += 1
                if num_created % 50000 == 0:
                    print(f"Generated {num_created} negative samples.")

    # Convert to numpy arrays once at the end
    X = np.vstack([X_pos, X_neg])
    y = np.hstack([y_pos, y_neg])

    print(f"Feature vectors created. Total samples: {X.shape[0]}")
    return X, y

# Create and split feature vectors
print("Creating and splitting feature vectors...")
X, y = create_feature_vectors(G, edges)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print(f"Training set size: {X_train.shape[0]}, Test set size: {X_test.shape[0]}")

with open("feature_vectors.pkl", "wb") as f:
    pickle.dump((X, y, X_train, X_test, y_train, y_test), f)

print("Feature vectors and splits saved successfully.")



Creating and splitting feature vectors...
Creating feature vectors for ML tasks...
Building adjacency dictionary...
Processing positive samples (existing edges)...
Processed 50000 positive samples.
Processed 100000 positive samples.
Processed 150000 positive samples.
Processed 200000 positive samples.
Processed 250000 positive samples.
Processed 300000 positive samples.
Processed 350000 positive samples.
Processed 400000 positive samples.
Processed 450000 positive samples.
Processed 500000 positive samples.
Processed 550000 positive samples.
Processed 600000 positive samples.
Processed 650000 positive samples.
Processed 700000 positive samples.
Processed 750000 positive samples.
Processed 800000 positive samples.
Processed 850000 positive samples.
Processed 900000 positive samples.
Processed 950000 positive samples.
Processed 1000000 positive samples.
Processed 1050000 positive samples.
Processed 1100000 positive samples.
Processed 1150000 positive samples.
Processed 1200000 positive s

In [12]:
with open("feature_vectors.pkl", "rb") as f:
    X, y, X_train, X_test, y_train, y_test = pickle.load(f)

print("Feature vectors and splits loaded successfully.")
print(f"Training set size: {X_train.shape[0]}, Test set size: {X_test.shape[0]}")

Feature vectors and splits loaded successfully.
Training set size: 2251449, Test set size: 562863


In [13]:
# Section 3B: Train the Neural Network

# Define the neural network
print("Defining the neural network model...")
model = tf.keras.Sequential([
    tf.keras.layers.Dense(128, activation="relu", input_shape=(X_train.shape[1],), name="Input_Layer"),
    tf.keras.layers.Dense(64, activation="relu", name="Hidden_Layer_1"),
    tf.keras.layers.Dense(32, activation="relu", name="Hidden_Layer_2"),
    tf.keras.layers.Dense(1, activation="sigmoid", name="Output_Layer"),
])
print("Model defined successfully.")
model.summary()

# Compile the model
print("Compiling the model...")
model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"])
print("Model compiled successfully.")

# Define a custom callback for logging
class TrainingLogger(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        print(f"\nEpoch {epoch + 1}:")
        print(
            f"  Training Loss: {logs['loss']:.4f}, Training Accuracy: {logs['accuracy']:.4f}"
        )
        print(
            f"  Validation Loss: {logs['val_loss']:.4f}, Validation Accuracy: {logs['val_accuracy']:.4f}"
        )

# Train the model
print("Starting model training...")
history = model.fit(
    X_train,
    y_train,
    epochs=20,
    batch_size=32,
    validation_split=0.2,
    callbacks=[TrainingLogger()],
    verbose=0  # Suppress default verbose to use custom logging
)
print("Model training complete.")

# Evaluate the model
model.save("trained_model.h5")
print("Model saved successfully to 'trained_model.h5'.")

# Save the training history
with open("training_history.pkl", "wb") as f:
    pickle.dump(history.history, f)
print("Training history saved successfully to 'training_history.pkl'.")

# Evaluate the model
print("Evaluating the model on the test set...")
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=1)
print(f"Test Loss: {test_loss:.4f}")
print(f"Test Accuracy: {test_accuracy:.4f}")


Defining the neural network model...
Model defined successfully.


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Compiling the model...
Model compiled successfully.
Starting model training...

Epoch 1:
  Training Loss: 16.0747, Training Accuracy: 0.6658
  Validation Loss: 0.6306, Validation Accuracy: 0.6756

Epoch 2:
  Training Loss: 0.6597, Training Accuracy: 0.6752
  Validation Loss: 0.6302, Validation Accuracy: 0.6756

Epoch 3:
  Training Loss: 0.6736, Training Accuracy: 0.6752
  Validation Loss: 0.6302, Validation Accuracy: 0.6756

Epoch 4:
  Training Loss: 0.6476, Training Accuracy: 0.6752
  Validation Loss: 0.6301, Validation Accuracy: 0.6756

Epoch 5:
  Training Loss: 0.6501, Training Accuracy: 0.6752
  Validation Loss: 0.6301, Validation Accuracy: 0.6756

Epoch 6:
  Training Loss: 0.6364, Training Accuracy: 0.6752
  Validation Loss: 0.6302, Validation Accuracy: 0.6756

Epoch 7:
  Training Loss: 0.6563, Training Accuracy: 0.6752
  Validation Loss: 0.6301, Validation Accuracy: 0.6756

Epoch 8:
  Training Loss: 0.6717, Training Accuracy: 0.6752
  Validation Loss: 0.6301, Validation Accuracy:




Epoch 20:
  Training Loss: 0.6359, Training Accuracy: 0.6752
  Validation Loss: 0.6302, Validation Accuracy: 0.6756
Model training complete.
Model saved successfully to 'trained_model.h5'.
Training history saved successfully to 'training_history.pkl'.
Evaluating the model on the test set...
[1m17590/17590[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 366us/step - accuracy: 0.6769 - loss: 0.6293
Test Loss: 0.6301
Test Accuracy: 0.6757


In [14]:
# Load the trained model
model = tf.keras.models.load_model("trained_model.h5")
print("Model loaded successfully from 'trained_model.h5'.")
model.summary()

# Load the training history
with open("training_history.pkl", "rb") as f:
    history = pickle.load(f)
print("Training history loaded successfully from 'training_history.pkl'.")

# Print the loaded training history (optional)
print("Loaded Training History:")
for key, values in history.items():
    print(f"{key}: {values[:5]}...")  # Show first 5 values as a preview



Model loaded successfully from 'trained_model.h5'.


Training history loaded successfully from 'training_history.pkl'.
Loaded Training History:
accuracy: [0.665799081325531, 0.6751697063446045, 0.6751769185066223, 0.675178587436676, 0.6751891374588013]...
loss: [16.074718475341797, 0.6597189903259277, 0.6735944151878357, 0.6475996375083923, 0.6501043438911438]...
val_accuracy: [0.6756379008293152, 0.6756334900856018, 0.6756379008293152, 0.6756356954574585, 0.6756334900856018]...
val_loss: [0.6305809020996094, 0.6302291750907898, 0.6301529407501221, 0.6300589442253113, 0.6301189661026001]...


In [15]:
# Helper function to find the best nodes
def find_best_nodes(G, neighbors, target_node):
    """
    Find the best nodes among neighbors based on shortest path distance to the target.
    Returns the list of tied 'best' nodes with the minimum distance.
    """
    distances = {
        neighbor: nx.shortest_path_length(G, source=neighbor, target=target_node)
        for neighbor in neighbors
    }
    min_distance = min(distances.values())
    best_nodes = [node for node, dist in distances.items() if dist == min_distance]
    return best_nodes

In [18]:
# def predict_next_node(model, G, current_node, target_node, visited, prediction_cache, debug=False):
#     """
#     Predict the next node from the current node and return step-by-step accuracy.
#     """
#     if debug:
#         print(f"Predicting next node from current node: {current_node}")

#     neighbors = [
#         neighbor for neighbor in G.neighbors(current_node)
#         if "features" in G.nodes[neighbor] and neighbor not in visited
#     ]

#     if not neighbors:
#         random_choice = random.choice(list(G.neighbors(current_node))) if G.neighbors(current_node) else None
#         if debug:
#             print("Opting For Random Choice")
#         return random_choice, False  # No "best choice" available, random node selected
    
#     if target_node in neighbors:
#         if debug:
#             print(f"Target node {target_node} is a direct neighbor. Automatically selecting it.")
#         return target_node, True  # Automatically move to the destination and mark as accurate

#     # Find the best nodes
#     best_nodes = find_best_nodes(G, neighbors, target_node)

#     # Predict probabilities for neighbors
#     predictions = []
#     for neighbor in neighbors:
#         feature_vector = G.nodes[neighbor]["features"] - G.nodes[target_node]["features"]
#         prob = model.predict(feature_vector.reshape(1, -1), verbose=0)[0][0]
#         predictions.append((neighbor, prob))

#     # Sort by probability
#     predictions.sort(key=lambda x: x[1], reverse=True)

#     if debug:
#         print(f"Neighbors Were Available, Chance of Containing Edge:{predictions[0][1]}, Selected Node {predictions[0][0]}" )
#     chosen_node = predictions[0][0]  # Node with the highest probability

#     is_accurate = chosen_node in best_nodes
#     return chosen_node, is_accurate

def predict_next_node(model, G, current_node, target_node, visited, prediction_cache, debug=False):
    if debug:
        print(f"Predicting next node from current node: {current_node}")

    neighbors = [
        neighbor for neighbor in G.neighbors(current_node)
        if "features" in G.nodes[neighbor] and neighbor not in visited
    ]

    if not neighbors:
        random_choice = random.choice(list(G.neighbors(current_node))) if len(list(G.neighbors(current_node))) > 0 else None
        if debug:
            print("Opting For Random Choice")
        return random_choice, False  # No "best choice" available, random node selected

    if target_node in neighbors:
        if debug:
            print(f"Target node {target_node} is a direct neighbor. Automatically selecting it.")
        return target_node, True  # Automatically move to the destination and mark as accurate

    # Find best nodes
    best_nodes = find_best_nodes(G, neighbors, target_node)

    # Predict probabilities for neighbors
    predictions = []
    target_features = np.array(G.nodes[target_node]["features"], dtype=np.float32)
    for neighbor in neighbors:
        neighbor_features = np.array(G.nodes[neighbor]["features"], dtype=np.float32)
        feature_vector = neighbor_features - target_features
        prob = model.predict(feature_vector.reshape(1, -1), verbose=0)[0][0]
        predictions.append((neighbor, prob))

    # Sort by probability
    predictions.sort(key=lambda x: x[1], reverse=True)

    if debug:
        print(f"Neighbors Available, Selected Node {predictions[0][0]} with Probability {predictions[0][1]}")
    chosen_node = predictions[0][0]  # Node with the highest probability

    is_accurate = chosen_node in best_nodes
    return chosen_node, is_accurate


def find_path(model, G, source, target, max_hops=40, debug=False):
    """
    Find the path while tracking accuracy at each step.
    Outputs path and per-move accuracy data.
    """
    if debug:
        print(f"Starting pathfinding from source: {source} to target: {target}, with max hops: {max_hops}")

    current_node = source
    visited = set()
    prediction_cache = {}
    path = [source]  # Track path regardless of success
    hops = 0
    correct_choices = 0
    step_actuals = []  # Ground truth: 1 when a "best choice" exists
    step_predictions = []  # Prediction: 1 if chosen move is among the best

    while hops < max_hops:
        if current_node == target:  # Success condition
            if debug:
                print(f"Pathfinding succeeded: target {target} reached.")
            return path, correct_choices, hops, step_actuals, step_predictions

        visited.add(current_node)
        next_node, is_accurate = predict_next_node(model, G, current_node, target, visited, prediction_cache, debug=debug)

        if not next_node:  # No valid moves left
            if debug:
                print(f"Pathfinding failed: no valid neighbors from {current_node}.")
            return path, correct_choices, hops, step_actuals, step_predictions

        # Log step accuracy
        step_actuals.append(1)  # A "best node" always exists
        step_predictions.append(1 if is_accurate else 0)

        if is_accurate:
            correct_choices += 1

        # Update path and state
        path.append(next_node)
        current_node = next_node
        hops += 1

    # Failure due to exceeding max hops
    if debug:
        print(f"Pathfinding failed: exceeded max hops ({max_hops}).")
    return path, correct_choices, hops, step_actuals, step_predictions


In [19]:
from sklearn.metrics import precision_score, recall_score, f1_score

def evaluate_pathfinding(model, G, max_hops=20, num_runs=20):
    """
    Evaluate the pathfinding algorithm, calculate metrics, and print paths.
    """
    total_hops = 0
    successful_runs = 0
    total_correct_choices = 0
    total_steps = 0
    all_actuals = []  # Tracks actual best node occurrences
    all_predictions = []  # Tracks predictions (1 if accurate, 0 otherwise)

    nodes_with_features = [node for node in G.nodes if "features" in G.nodes[node]]

    for run in range(num_runs):
        if len(nodes_with_features) < 2:
            print("Not enough nodes with features for evaluation.")
            break

        source_node, target_node = random.sample(nodes_with_features, 2)
        print(f"\nRun {run + 1}/{num_runs}: Source {source_node} -> Target {target_node}")

        # Run pathfinding
        path, correct_choices, steps, step_actuals, step_predictions = find_path(model, G, source_node, target_node, max_hops)

        # Print path regardless of success
        print(f"  Path: {path}")

        if path and path[-1] == target_node:
            print(f"  Path found in {steps} steps. Correct choices: {correct_choices}/{steps}")
            successful_runs += 1
            total_hops += steps
        else:
            print(f"  Path failed after {steps} steps.")

        # Update precision/recall data
        all_actuals.extend(step_actuals)
        all_predictions.extend(step_predictions)

        # Track total moves
        total_correct_choices += correct_choices
        total_steps += steps

    # Metrics
    success_rate = successful_runs / num_runs * 100
    average_hops = total_hops / successful_runs if successful_runs > 0 else float('inf')
    accuracy = total_correct_choices / total_steps if total_steps > 0 else 0.0
    f1 = f1_score(all_actuals, all_predictions, zero_division=0)

    # Print summary
    print(f"\n--- Summary ---")
    print(f"Success rate: {success_rate:.2f}% ({successful_runs}/{num_runs})")
    print(f"Average hops: {average_hops:.2f}")
    print(f"Accuracy: {accuracy:.2f}")
    print(f"F1-score: {f1:.2f}")

    return success_rate, average_hops, accuracy, precision, recall, f1




success_rate, average_hops, accuracy, precision, recall, f1 = evaluate_pathfinding(model, G, max_hops=20, num_runs=100)



Run 1/100: Source 9343 -> Target 65545
  Path: ['9343', '130', '0', '2', '1', '9', '10', '11', '256', '31', '7', '52', '66', '8', '253', '56', '116', '30', '148', '77', '141']
  Path failed after 20 steps.

Run 2/100: Source 31829 -> Target 141600
  Path: ['31829', '307', '141600']
  Path found in 2 steps. Correct choices: 2/2

Run 3/100: Source 31785 -> Target 175128
  Path: ['31785', '490', '0', '2', '1', '9', '10', '11', '256', '31', '7', '52', '66', '8', '253', '56', '116', '30', '148', '77', '141']
  Path failed after 20 steps.

Run 4/100: Source 103460 -> Target 32580
  Path: ['103460', '16959', '220', '0', '2', '1', '9', '10', '11', '256', '31', '7', '52', '66', '8', '253', '56', '116', '30', '148', '77']
  Path failed after 20 steps.

Run 5/100: Source 114245 -> Target 139449
  Path: ['114245', '22726', '275', '0', '2', '1', '9', '10', '11', '256', '31', '7', '52', '66', '8', '253', '56', '116', '30', '148', '77']
  Path failed after 20 steps.

Run 6/100: Source 8605 -> Target

NameError: name 'precision' is not defined