In [9]:
import joblib
import numpy as np
import os
import pandas as pd
import torch
import torch.nn as nn

from pathlib import Path
from sklearn.preprocessing import MinMaxScaler, FunctionTransformer, StandardScaler
from sklearn.pipeline import Pipeline
from torch_geometric.data import Data
from tqdm import tqdm

torch.set_printoptions(sci_mode=False)

DATASET_PATH = r"E:\gnn_data\processed_step_data_full_node_features"

In [10]:
pipeline = Pipeline([
    ("log", FunctionTransformer(np.log1p, validate=True)),
    ("scale", StandardScaler())
])
data = pd.read_csv(r"./data/synced_dataset_final.csv")

In [11]:
features = [
    "faces", "edges", "vertices", "quantity",
    "height", "width", "depth", "volume", "area",
    "bbox_height", "bbox_width", "bbox_depth", "bbox_volume",
    "bbox_area",
]
X = data[features].copy()
X_scaled = pipeline.fit_transform(X)
scaled_df = pd.DataFrame(X_scaled, columns=features)
scaled_df

Unnamed: 0,faces,edges,vertices,quantity,height,width,depth,volume,area,bbox_height,bbox_width,bbox_depth,bbox_volume,bbox_area
0,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
1,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
2,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
3,-1.345891,-1.321817,-1.301147,-0.623065,0.773427,0.040955,0.092603,0.661516,0.190447,0.166351,-0.245134,1.103236,0.465420,1.103236
4,-1.204550,-1.387839,-1.401598,-0.623065,0.385735,-0.103938,0.199120,-0.009061,0.102957,0.281921,-0.389559,0.689213,0.266657,0.689213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63286,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
63287,0.103427,0.025129,-0.120699,1.972633,0.695243,0.448623,0.227919,0.751649,0.331204,0.313167,0.161217,1.019742,0.687079,1.019742
63288,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
63289,1.177123,1.243209,1.280306,-0.623065,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008


In [12]:
scaled_df.describe()

Unnamed: 0,faces,edges,vertices,quantity,height,width,depth,volume,area,bbox_height,bbox_width,bbox_depth,bbox_volume,bbox_area
count,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0,63291.0
mean,-3.745194e-16,2.775216e-16,-8.98128e-18,5.0295170000000006e-17,4.3559210000000006e-17,-2.514758e-16,-2.514758e-16,1.410061e-16,-3.457793e-16,-1.796256e-16,-2.586609e-16,1.023866e-16,-2.344114e-16,1.023866e-16
std,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008,1.000008
min,-2.788245,-3.000654,-3.01868,-0.6230655,-3.119035,-3.275045,-3.254481,-4.267127,-5.218936,-3.468265,-3.550652,-3.054336,-4.427959,-3.054336
25%,-0.6858142,-0.6419232,-0.6555887,-0.6230655,-0.6054304,-0.6965314,-0.7107615,-0.5799896,-0.5882728,-0.7033272,-0.6069614,-0.6097639,-0.5726225,-0.6097639
50%,-0.1353647,-0.09308507,-0.08880481,-0.3547146,-0.05136448,0.0409551,-0.006880988,-0.01419641,-0.03143936,0.00772936,0.07719783,-0.03923133,0.03755414,-0.03923133
75%,0.6022812,0.6270531,0.62331,0.2060566,0.6870038,0.683204,0.6755456,0.6657674,0.6566541,0.6556717,0.6313362,0.68663,0.6668648,0.68663
max,3.385108,3.33866,3.699658,9.585701,3.983474,3.723741,3.689107,3.173189,3.942813,3.549576,3.463669,3.810437,4.307408,3.810437


In [19]:
data["features"] = scaled_df.values.tolist()

In [21]:
non_feature_cols = data.drop(columns=features)
new_df = pd.concat([non_feature_cols, scaled_df], axis=1)
new_df

Unnamed: 0,item_id,step_file,technology_id,material_id,post_processing_id,download_file_url,technology_name,is_cnc,multiclass_labels,graphml_file,...,height,width,depth,volume,area,bbox_height,bbox_width,bbox_depth,bbox_volume,bbox_area
0,100035,E:\gnn_data\step_files\100035_MakerVerse_Sampl...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,813,[1000],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\100035_MakerVerse_Sa...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
1,100036,E:\gnn_data\step_files\100036_MakerVerse_Sampl...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,813,[1000],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\100036_MakerVerse_Sa...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
2,100040,E:\gnn_data\step_files\100040_00048125_Stator_...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,813,[1000],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\100040_00048125_Stat...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
3,100041,E:\gnn_data\step_files\100041_00048495_Table_L...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,1311,[277],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\100041_00048495_Tabl...,...,0.773427,0.040955,0.092603,0.661516,0.190447,0.166351,-0.245134,1.103236,0.465420,1.103236
4,100042,E:\gnn_data\step_files\100042_00048569_Seal_Ho...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,1315,[277],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\100042_00048569_Seal...,...,0.385735,-0.103938,0.199120,-0.009061,0.102957,0.281921,-0.389559,0.689213,0.266657,0.689213
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
63286,99989,E:\gnn_data\step_files\99989_MakerVerse_Sample...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,804,[277],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\99989_MakerVerse_Sam...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
63287,99990,E:\gnn_data\step_files\99990_240332A01.step,feb0f26f-94a5-4be2-9d40-761bb2857ab6,1301,[277],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\99990_240332A01.graphml,...,0.695243,0.448623,0.227919,0.751649,0.331204,0.313167,0.161217,1.019742,0.687079,1.019742
63288,99991,E:\gnn_data\step_files\99991_MakerVerse_Sample...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,813,[1000],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\99991_MakerVerse_Sam...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008
63289,99993,E:\gnn_data\step_files\99993_MakerVerse_Sample...,feb0f26f-94a5-4be2-9d40-761bb2857ab6,813,[1000],https://prod-mv-user-files-upload.s3.eu-centra...,CNC (Metal),1,2,E:\gnn_data\graphml_files\99993_MakerVerse_Sam...,...,1.100018,0.944946,0.903087,1.299111,1.311256,1.045717,0.655937,1.452008,1.425970,1.452008


In [22]:
new_df.to_csv(r"./data/synced_dataset_final_scaled.csv", index=False)

In [4]:
data = torch.load(
    r"E:\gnn_data\processed_step_data_full_node_features\100064_00048907_Pin5_step_002.pt",
    weights_only=False)

In [46]:
data.x

tensor([[   228.0000,      2.0000,      0.0004,  ...,     -0.3974,
              0.0397,     -0.0896],
        [   331.0000,      2.0000,      0.0004,  ...,     -0.1461,
             -0.0026,      0.1902],
        [     3.0000,      3.0000,      0.0007,  ...,     -0.0340,
              0.0294,      0.0598],
        ...,
        [   181.0000,      2.0000,      0.0004,  ...,     -0.3073,
              0.4790,     -0.2663],
        [   182.0000,      2.0000,      0.0004,  ...,      0.3461,
              0.2407,      0.1121],
        [    72.0000,      1.0000,      0.0002,  ...,     -0.0518,
             -0.0519,     -0.0803]])

In [45]:
data.x[:, [1, 6]]

tensor([[2.0000, 3.0000],
        [2.0000, 2.0000],
        [3.0000, 2.5000],
        ...,
        [2.0000, 1.0000],
        [2.0000, 0.0000],
        [1.0000, 0.0000]])

In [25]:
min_linear_features = torch.full((4,), float('inf'))
max_linear_features = torch.full((4,), float('-inf'))

linear_features = data.x[:, 2:6]
max_linear_features = torch.max(max_linear_features,
                                linear_features.max(dim=0).values)
min_linear_features = torch.min(min_linear_features,
                                linear_features.min(dim=0).values)
max_linear_features, min_linear_features
# max_linear_features.shape, linear_features.max(dim=0).values.shape

(tensor([    0.0262,     0.0000,     0.0000,     0.0000]),
 tensor([    0.0000,     0.0000,     0.0000,     0.0000]))

In [27]:
def get_global_min_max(dataset_path):
    # Initialize min and max for log-transformed features (Node Degree, Avg Neighbor Degree)
    # These are features at index 0 and 5
    min_log_features = torch.full((2,), float('inf'))
    max_log_features = torch.full((2,), float('-inf'))

    # Initialize min and max for linearly scaled features (Degree Centrality, Betweenness, Closeness, PageRank)
    # These are features at index 1, 2, 3, 4
    min_linear_features = torch.full((4,), float('inf'))
    max_linear_features = torch.full((4,), float('-inf'))
    processed_files_count = 0

    for filename in tqdm(os.listdir(dataset_path)):
        if filename.endswith(".pt"):
            try:
                file_path = os.path.join(dataset_path, filename)
                data = torch.load(file_path, weights_only=False)
                if hasattr(data, "x") and data.x is not None:
                    node_degree = data.x[:,
                                  1:2]  # Assuming first column is node degree
                    avg_neighbor_degree = data.x[:, 6: 7]
                    log_node_degree = torch.log1p(node_degree)
                    log_avg_neighbor_degree = torch.log1p(avg_neighbor_degree)
                    # Update min/max for log features
                    min_log_features[0] = torch.min(min_log_features[0],
                                                    log_node_degree.min())
                    max_log_features[0] = torch.max(max_log_features[0],
                                                    log_node_degree.max())
                    min_log_features[1] = torch.min(min_log_features[1],
                                                    log_avg_neighbor_degree.min())
                    max_log_features[1] = torch.max(max_log_features[1],
                                                    log_avg_neighbor_degree.max())

                    # Linear features (Degree Centrality, Betweenness, Closeness, PageRank)
                    linear_features = data.x[:, 2:6]
                    max_linear_features = torch.max(max_linear_features[0],
                                                    linear_features.max(
                                                        dim=0).values)
                    min_linear_features = torch.min(min_linear_features[1],
                                                    linear_features.min(
                                                        dim=0).values)

                    processed_files_count += 1
                else:
                    print(
                        f"Invalid data in {filename}: 'x' attribute is missing or None")
            except Exception as e:
                print(f"Error processing {filename}: {e}")
    return min_log_features, max_log_features, min_linear_features, max_linear_features, processed_files_count

In [28]:
min_log_features, max_log_features, min_linear_features, max_linear_features, processed_files_count = get_global_min_max(
    DATASET_PATH)

100%|██████████| 64580/64580 [02:42<00:00, 397.88it/s] 


In [29]:
min_log_features, max_log_features, min_linear_features, max_linear_features

(tensor([0., 0.]),
 tensor([10.6986, 10.0054]),
 tensor([0., 0., 0., 0.]),
 tensor([0.9438, 0.9438, 0.9438, 0.9438]))

In [33]:
def initialize_minmax_scaler(min_vals: torch.Tensor, max_vals: torch.Tensor,
                             feature_range: tuple = (-1, 1)):
    """
    Initializes a MinMaxScaler with pre-defined min and max values.
    Handles cases where data_range_ might be zero to prevent division by zero.
    """
    scaler = MinMaxScaler(feature_range=feature_range)
    min_np = min_vals.cpu().numpy()
    max_np = max_vals.cpu().numpy()

    scaler.data_min_ = min_np
    scaler.data_max_ = max_np
    scaler.data_range_ = scaler.data_max_ - scaler.data_min_

    # If data_range_ is zero (min_np == max_np), set scale_ to 1.0 to avoid division by zero.
    scaler.scale_ = np.where(scaler.data_range_ == 0,
                             (feature_range[1] - feature_range[0]) / 2,
                             # Map to midpoint if range is zero
                             (feature_range[1] - feature_range[
                                 0]) / scaler.data_range_)

    # The min_ attribute is used in the transformation formula: X_scaled = X_std * (max - min) + min
    # where X_std = (X - data_min_) / data_range_
    # So, X_scaled = (X - data_min_) * scale_ + feature_range[0]
    # This implies min_ = feature_range[0] - data_min_ * scale_
    scaler.min_ = feature_range[0] - scaler.data_min_ * scaler.scale_

    return scaler

In [66]:
def apply_feature_scaling(
        directory_path: str,
        min_log_values: torch.Tensor,
        max_log_values: torch.Tensor,
        min_linear_values: torch.Tensor,
        max_linear_values: torch.Tensor,
        feature_range: tuple = (-1, 1),
        output_directory: str = None
        # Default to overwrite files in directory_path
):
    log_scaler = initialize_minmax_scaler(min_log_values, max_log_values,
                                          feature_range)
    print(
        f"Log-transform Scaler (Features 0, 5) initialized with feature_range={feature_range}")
    print(f"  data_min_: {log_scaler.data_min_}")
    print(f"  data_max_: {log_scaler.data_max_}")
    print(f"  scale_: {log_scaler.scale_}")
    print(f"  min_ (offset): {log_scaler.min_}")

    # Initialize the scaler for linearly scaled features (1, 2, 3, 4)
    linear_scaler = initialize_minmax_scaler(min_linear_values,
                                             max_linear_values, feature_range)
    print(
        f"\nLinear Scaler (Features 1,2,3,4) initialized with feature_range={feature_range}")
    print(f"  data_min_: {linear_scaler.data_min_}")
    print(f"  data_max_: {linear_scaler.data_max_}")
    print(f"  scale_: {linear_scaler.scale_}")
    print(f"  min_ (offset): {linear_scaler.min_}")

    # Set up output directory
    if output_directory and not os.path.exists(output_directory):
        os.makedirs(output_directory)
        print(f"\nCreated output directory: {output_directory}")
    elif output_directory and output_directory == directory_path:
        print(
            "\nWarning: output_directory is the same as directory_path. Files will be overwritten.")
    elif not output_directory:
        print(
            "\nWarning: No output_directory specified. Files will be overwritten in the original directory.")

    processed_files_count = 0
    skipped_files_count = 0

    print(f"Applying scaling to files in '{directory_path}'...")
    for filename in tqdm(os.listdir(directory_path)):
        if filename.endswith(".pt"):
            input_file_path = os.path.join(directory_path, filename)
            output_file_path = os.path.join(
                output_directory if output_directory else directory_path,
                filename)

            try:
                data = torch.load(input_file_path, weights_only=False)

                if (hasattr(data, "x") and hasattr(data, "global_features")
                        and data.x is not None and data.global_features is not None
                        and data.x.shape[1] >= 6):
                    # Ensure data.x is on CPU for numpy conversion and to avoid device issues
                    original_x = data.x.cpu()
                    original_device = data.x.device  # Store original device to move back

                    # --- Process Features 0 (Node Degree) and 5 (Average Neighbor Degree) ---
                    # Extract both features as a single (N, 2) array for the log_scaler
                    log_features_orig = original_x[:, [1, 6]].numpy()
                    log_transformed_features = np.log1p(log_features_orig)
                    scaled_log_features_np = log_scaler.transform(
                        log_transformed_features)

                    # Split the scaled log features back into individual tensors
                    scaled_degree_torch = torch.from_numpy(
                        scaled_log_features_np[:, 0:1]).to(original_device)
                    scaled_avg_degree_torch = torch.from_numpy(
                        scaled_log_features_np[:, 1:2]).to(original_device)

                    # --- Process Features 1, 2, 3, 4 (Centralities, PageRank) ---
                    linear_features_orig = original_x[:, 2:6].numpy()
                    scaled_linear_features_np = linear_scaler.transform(
                        linear_features_orig)
                    scaled_linear_features_torch = torch.from_numpy(
                        scaled_linear_features_np).to(original_device)

                    # --- Concatenate all scaled features and original embeddings ---
                    # Ensure the order is correct: [F0_scaled, F1-4_scaled, F5_scaled, Embeddings]
                    data.x = torch.cat((
                        original_x[:, 0:1].to(original_device),
                        # Node ID or other feature
                        scaled_degree_torch,
                        scaled_linear_features_torch,
                        scaled_avg_degree_torch,
                        original_x[:, 7:].to(original_device)
                    # Original embeddings (features 6 to 37)
                    ), dim=1)

                    original_global = data.global_features.to("cpu")

                    # Save the modified Data object
                    torch.save(data, output_file_path)
                    processed_files_count += 1

                else:
                    print(
                        f"Skipping '{filename}': 'data.x' not found or has fewer than 6 features.")
                    skipped_files_count += 1

            except Exception as e:
                print(f"Error processing '{filename}': {e}")
                skipped_files_count += 1

    print(
        f"\nScaling complete. Processed {processed_files_count} files, skipped {skipped_files_count} files.")
    if not output_directory:
        print("Original files have been overwritten.")
    elif output_directory != directory_path:
        print(
            f"Scaled files saved to '{output_directory}'. Original files remain in '{directory_path}'.")

In [67]:
apply_feature_scaling(
    DATASET_PATH,
    min_log_features, max_log_features,
    min_linear_features, max_linear_features,
    feature_range=(-1, 1),
    output_directory=r"E:\gnn_data\processed_step_data_full_node_features_scaled")  # Set to None to overwrite files in DATASET_PATH)

Log-transform Scaler (Features 0, 5) initialized with feature_range=(-1, 1)
  data_min_: [0. 0.]
  data_max_: [10.69856  10.005435]
  scale_: [0.18694106 0.19989136]
  min_ (offset): [-1. -1.]

Linear Scaler (Features 1,2,3,4) initialized with feature_range=(-1, 1)
  data_min_: [0. 0. 0. 0.]
  data_max_: [0.9437746 0.9437746 0.9437746 0.9437746]
  scale_: [2.1191502 2.1191502 2.1191502 2.1191502]
  min_ (offset): [-1. -1. -1. -1.]
Applying scaling to files in 'E:\gnn_data\processed_step_data_full_node_features'...


100%|██████████| 64580/64580 [12:27<00:00, 86.37it/s] 


Scaling complete. Processed 64579 files, skipped 0 files.
Scaled files saved to 'E:\gnn_data\processed_step_data_full_node_features_scaled'. Original files remain in 'E:\gnn_data\processed_step_data_full_node_features'.





In [101]:
feature_range = (-1, 1)
min_np = global_min_values.cpu().numpy()
max_np = global_max_values.cpu().numpy()
scaler = MinMaxScaler(feature_range=feature_range)
scaler.data_max_ = max_np
scaler.data_min_ = min_np
scaler.data_range_ = scaler.data_max_ - scaler.data_min_
scaler.scale_ = np.where(scaler.data_range_ == 0, 1.0, (
        feature_range[1] - feature_range[0]) / scaler.data_range_)
scaler.min_ = feature_range[0] - scaler.data_min_ * scaler.scale_
print(f"MinMaxScaler initialized with feature_range={feature_range}")
print(f"Scaler data_min_: {scaler.data_min_}")
print(f"Scaler data_max_: {scaler.data_max_}")
print(f"Scaler scale_: {scaler.scale_}")
print(f"Scaler min_ (offset): {scaler.min_}")

MinMaxScaler initialized with feature_range=(-1, 1)
Scaler data_min_: [0.000000e+00 0.000000e+00 0.000000e+00 0.000000e+00 2.077666e-06
 0.000000e+00]
Scaler data_max_: [4.4291000e+04 9.4377458e-01 8.0201477e-02 7.7898264e-01 2.9146492e-01
 2.2145500e+04]
Scaler scale_: [4.5155899e-05 2.1191502e+00 2.4937197e+01 2.5674512e+00 6.8619385e+00
 9.0311798e-05]
Scaler min_ (offset): [-1.        -1.        -1.        -1.        -1.0000143 -1.       ]


In [102]:
DATASET_SCALED_PATH = r"E:\gnn_data\processed_step_data_full_node_features_scaled"
os.makedirs(DATASET_SCALED_PATH, exist_ok=True)

processed_files_count = 0
skipped_files_count = 0

for filename in tqdm(os.listdir(DATASET_PATH)[:10]):
    if filename.endswith(".pt"):
        input_file_path = os.path.join(DATASET_PATH, filename)
        output_file_path = os.path.join(DATASET_SCALED_PATH, filename)
        try:
            data = torch.load(input_file_path, weights_only=False)
            if hasattr(data, "x") and data.x is not None:
                node_features = data.x[:, 1:2]
                avg_neighbor_degree = data.x[:, 6:7]
                scaled_features = scaler.transform(node_features.cpu().numpy())
                data.x[:, 1:7] = torch.tensor(scaled_features,
                                              dtype=data.x.dtype)
                torch.save(data, output_file_path)
                processed_files_count += 1
            else:
                print(
                    f"Invalid data in {filename}: 'x' attribute is missing or None")
                skipped_files_count += 1
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            skipped_files_count += 1

100%|██████████| 10/10 [00:00<00:00, 80.07it/s]


In [59]:
original_data = torch.load(
    r"E:\gnn_data\processed_step_data_full_node_features\100045_00048907_Pin5_step_002.pt",
    weights_only=False)
scaled_data = torch.load(
    r"E:\gnn_data\processed_step_data_full_node_features_scaled\100045_00048907_Pin5_step_002.pt",
    weights_only=False)

In [60]:
max_log_features, max_linear_features

(tensor([10.6986, 10.0054]), tensor([0.9438, 0.9438, 0.9438, 0.9438]))

In [64]:
original_data.x

tensor([[   228.0000,     10.0000,      0.0355,  ...,     -0.3974,
              0.0397,     -0.0896],
        [   287.0000,      2.0000,      0.0071,  ...,     -0.2904,
              0.2174,     -0.0353],
        [    17.0000,      1.0000,      0.0035,  ...,     -0.1551,
              0.1201,     -0.3482],
        ...,
        [    59.0000,      1.0000,      0.0035,  ...,      0.3507,
             -0.0930,     -0.2071],
        [   146.0000,      1.0000,      0.0035,  ...,      0.1529,
             -0.1725,      0.4640],
        [   146.0000,      1.0000,      0.0035,  ...,      0.1529,
             -0.1725,      0.4640]])

In [63]:
scaled_data.x

tensor([[   228.0000,     -0.5517,     -0.9249,  ...,     -0.3974,
              0.0397,     -0.0896],
        [   287.0000,     -0.7946,     -0.9850,  ...,     -0.2904,
              0.2174,     -0.0353],
        [    17.0000,     -0.8704,     -0.9925,  ...,     -0.1551,
              0.1201,     -0.3482],
        ...,
        [    59.0000,     -0.8704,     -0.9925,  ...,      0.3507,
             -0.0930,     -0.2071],
        [   146.0000,     -0.8704,     -0.9925,  ...,      0.1529,
             -0.1725,      0.4640],
        [   146.0000,     -0.8704,     -0.9925,  ...,      0.1529,
             -0.1725,      0.4640]])

In [23]:
sample = torch.load(r"E:\gnn_data\processed_step_data_global_features\52791_MTG_coffeefilter.pt", weights_only=False)
sample

Data(x=[115205, 7], edge_index=[2, 262218], y=[1], global_features=[14])

In [7]:
data.global_features

tensor([   10.0000,    24.0000,    16.0000,     1.0000,   110.0000,    60.0000,
           60.0000, 198807.0781, 25498.2949,    60.0000,    60.0000,   110.0000,
        396000.0000,   110.0000])