In [2]:
from collections import defaultdict
import joblib
import optuna.importance
# autoimport
%load_ext autoreload
%autoreload 1

In [3]:
from src.gnn_models import *
%aimport src.gnn_models

In [4]:
PROCESSED_DATA_DIR = r"E:\gnn_data\processed_step_data_global_features"
# PROCESSED_DATA_DIR = r"E:\gnn_data\pyg_data_v2"
dataset = FastSTEPDataset(PROCESSED_DATA_DIR, start_index=0)
get_dataset_stats(dataset)

Fast dataset loaded:
- Total samples: 63043
- Processed successfully: 63043
- Failed processing: 54
Label counts in dataset:
Label 1: 40434 instances
Label 0: 22609 instances
Label 1: 64.14% of total instances
Label 0: 35.86% of total instances
Class weights for loss function: [1.5591581342434584, 2.7884028484231944]


In [5]:
dataset[0]

Data(x=[405, 7], edge_index=[2, 948], y=[1], global_features=[1, 14])

In [7]:
model = GINCombined(
    input_features=dataset[0].x.shape[1],
    global_feature_dim=dataset[0].global_features.shape[1],
    embedding_dim=16,
    hidden_sizes=[256, 256],
    conv_dropout_rate=0.1,
    classifier_dropout_rate=0.1,
    use_layer_norm=True,
    pool_hidden_size=128
)

Created Dynamic GIN model:
- Input features: 7
- Number of hidden layers: 2
- Hidden layer sizes: [256, 256]
- Output classes: 2
- Convolution dropout rate: 0.1
- Classifier dropout rate: 0.1
- Layer normalization: True


In [5]:
all_history = {}

In [8]:
for i in range(1):
    try:
        torch.cuda.empty_cache()
        model_save_path = "gin_model_combined.pth"
        history_save_path = "gin_model_combined_training_history.pkl"
        if Path(model_save_path).exists():
            print(f"Loading model from {model_save_path}")
            model.load_state_dict(torch.load(model_save_path))
        else:
            print(f"Model file {model_save_path} does not exist. Initializing a new model.")
        # save training history

        with open(history_save_path, "rb") as f:
            all_history = joblib.load(f)
        trained_model, history = simple_train_model_v3(
            dataset,
            gnn_model=model,
            num_epochs=50,
            batch_size=12,
            learning_rate=0.0001,
            start_index=0,
            num_graphs_to_use=63000,
        )
        for key in history.keys():
            if not key in all_history:
                all_history[key] = []
            all_history[key].extend(history[key])
        all_history["epoch"] = list(range(1, len(all_history["epoch"]) + 1))
        with open(history_save_path, "wb") as f:
            joblib.dump(all_history, f)
        torch.save(model.state_dict(), model_save_path)
    except Exception as e:
        print(f"An error occurred during training epoch {i}: {e}")
        continue

Loading model from gin_model_combined.pth
Label 1: 40414 instances
Label 0: 22586 instances
Label 1: 64.15% of total instances
Label 0: 35.85% of total instances
Class weights: tensor([2.7893, 1.5589], device='cuda:0')
Splitting dataset into train and validation sets
Train samples: 50400
Validation samples: 12600

Starting training for 50 epochs...


Epoch 1/50 [Train]:   0%|          | 0/4200 [00:00<?, ?it/s]

x shape: torch.Size([65527, 7])
edge_index shape: torch.Size([2, 146120])
batch shape: torch.Size([65527])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 3/4200 [00:00<11:48,  5.93it/s, loss=0.1460, acc=86.11%, f1=0.8621]

x shape: torch.Size([63817, 7])
edge_index shape: torch.Size([2, 145736])
batch shape: torch.Size([63817])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([95164, 7])
edge_index shape: torch.Size([2, 227586])
batch shape: torch.Size([95164])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 5/4200 [00:00<09:54,  7.05it/s, loss=0.5315, acc=88.33%, f1=0.8838]

x shape: torch.Size([117263, 7])
edge_index shape: torch.Size([2, 274114])
batch shape: torch.Size([117263])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([51274, 7])
edge_index shape: torch.Size([2, 120934])
batch shape: torch.Size([51274])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 7/4200 [00:01<08:51,  7.90it/s, loss=0.1863, acc=86.90%, f1=0.8693]

x shape: torch.Size([36958, 7])
edge_index shape: torch.Size([2, 85474])
batch shape: torch.Size([36958])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([85232, 7])
edge_index shape: torch.Size([2, 193238])
batch shape: torch.Size([85232])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 8/4200 [00:01<08:29,  8.24it/s, loss=0.6495, acc=86.11%, f1=0.8617]

x shape: torch.Size([31701, 7])
edge_index shape: torch.Size([2, 74712])
batch shape: torch.Size([31701])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([32789, 7])
edge_index shape: torch.Size([2, 76918])
batch shape: torch.Size([32789])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([154491, 7])
edge_index shape: torch.Size([2, 372812])
batch shape: torch.Size([154491])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 12/4200 [00:01<07:40,  9.09it/s, loss=0.1793, acc=87.18%, f1=0.8726]

x shape: torch.Size([33555, 7])
edge_index shape: torch.Size([2, 75390])
batch shape: torch.Size([33555])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([34735, 7])
edge_index shape: torch.Size([2, 80012])
batch shape: torch.Size([34735])
global_features shape: torch.Size([12, 14])
x shape: torch.Size([15593, 7])
edge_index shape: torch.Size([2, 36416])
batch shape: torch.Size([15593])
global_features shape: torch.Size([12, 14])


Epoch 1/50 [Train]:   0%|          | 14/4200 [00:01<09:09,  7.62it/s, loss=0.2030, acc=86.90%, f1=0.8698]


x shape: torch.Size([50032, 7])
edge_index shape: torch.Size([2, 112184])
batch shape: torch.Size([50032])
global_features shape: torch.Size([12, 14])


KeyboardInterrupt: 

In [8]:
all_history

{'train_loss': [0.6210318155693156,
  0.5884338162484624,
  0.5807581246058856,
  0.5784128614000621,
  0.5751571377641743,
  0.5726831243905638,
  0.5708053694754129,
  0.5699995922900382,
  0.567597364683946,
  0.5667620221028725,
  0.5674510450155608,
  0.5656477987482434,
  0.5645915722554283,
  0.5617489199074251,
  0.5628458397702447,
  0.5620067406915837,
  0.559581496432601,
  0.5604556760866017,
  0.5597009777730064,
  0.5600156294989089,
  0.5582568416744471,
  0.5587369639506298,
  0.557835575489416,
  0.5562462923622558,
  0.5561987818192159,
  0.5555666007083796,
  0.5553133543775904,
  0.5549816019389601,
  0.5563397216601741,
  0.5559190037278902,
  0.5546640211006715,
  0.5552630360176166,
  0.5537093390675173,
  0.5547770561242388,
  0.555516376777419,
  0.5539051185841007,
  0.5538902622958024,
  0.5519740365392395,
  0.5521404327993237,
  0.5517584696465305,
  0.5518727227344754,
  0.5511733574863701,
  0.5518009466527118,
  0.6181215317653758,
  0.5722680032608055,


In [9]:
torch.save(model.state_dict(), model_save_path)

In [10]:
torch.cuda.empty_cache()
model_save_path = "dynamic_gin_embedding_model.pth"
torch.save(model.state_dict(), model_save_path)
trained_model, history = simple_train_model_v2(
        dataset_trim,
        gnn_model=model,
        num_epochs=5,
        batch_size=4,
        learning_rate=0.002,
        start_index=0,
        num_graphs_to_use=60000,
    )

Label 1: 38182 instances
Label 0: 21818 instances
Label 1: 63.64% of total instances
Label 0: 36.36% of total instances
Class weights: tensor([2.7500, 1.5714], device='cuda:0')
Splitting dataset into train and validation sets
Train samples: 48000
Validation samples: 12000

Starting training for 5 epochs...


Epoch 1/5 [Train]: 100%|██████████| 12000/12000 [04:33<00:00, 43.87it/s, loss=0.2671, acc=81.23%]
Epoch 1/5 [Val]: 100%|██████████| 3000/3000 [00:50<00:00, 59.09it/s, loss=0.6085, acc=81.62%] 


Epoch 1/5 - Train Loss: 0.4351, Acc: 81.23% | Val Loss: 0.4540, Acc: 81.62% (Best Val: 81.62%)


Epoch 2/5 [Train]: 100%|██████████| 12000/12000 [05:10<00:00, 38.67it/s, loss=0.0419, acc=81.33%]
Epoch 2/5 [Val]: 100%|██████████| 3000/3000 [02:21<00:00, 21.18it/s, loss=0.5406, acc=82.01%] 


Epoch 2/5 - Train Loss: 0.4342, Acc: 81.33% | Val Loss: 0.4338, Acc: 82.01% (Best Val: 82.01%)


Epoch 3/5 [Train]: 100%|██████████| 12000/12000 [04:28<00:00, 44.69it/s, loss=0.1811, acc=81.44%] 
Epoch 3/5 [Val]: 100%|██████████| 3000/3000 [00:51<00:00, 58.78it/s, loss=0.6834, acc=81.19%] 


Epoch 3/5 - Train Loss: 0.4330, Acc: 81.44% | Val Loss: 0.4391, Acc: 81.19% (Best Val: 82.01%)


Epoch 4/5 [Train]: 100%|██████████| 12000/12000 [05:01<00:00, 39.86it/s, loss=0.2786, acc=81.44%]
Epoch 4/5 [Val]: 100%|██████████| 3000/3000 [02:22<00:00, 21.02it/s, loss=0.5135, acc=82.33%] 


Epoch 4/5 - Train Loss: 0.4297, Acc: 81.44% | Val Loss: 0.4412, Acc: 82.33% (Best Val: 82.33%)


Epoch 5/5 [Train]: 100%|██████████| 12000/12000 [04:19<00:00, 46.19it/s, loss=0.3382, acc=81.76%]
Epoch 5/5 [Val]: 100%|██████████| 3000/3000 [00:49<00:00, 60.11it/s, loss=0.3723, acc=80.85%] 


Epoch 5/5 - Train Loss: 0.4255, Acc: 81.76% | Val Loss: 0.4389, Acc: 80.85% (Best Val: 82.33%)
Training completed!
Training time: 1849.739828900012
Best validation accuracy: 82.33%


In [26]:
import joblib
import optuna
optuna_result = r"optuna_results_20250820_213139.pkl"
with open(optuna_result, "rb") as f:
    optuna_results = joblib.load(f)

In [18]:
[item.values for item in optuna_results["fold_0"]["study"].trials]

[[0.5],
 [0.5],
 [0.9350761771202087],
 [0.5],
 [0.9295566082000732],
 [0.9362394213676453],
 [0.9269653558731079],
 [0.9116346836090088],
 [0.5],
 [0.93639075756073],
 [0.9339721202850342],
 [0.93220055103302],
 [0.9363347887992859],
 [0.9348156452178955],
 [0.9363428354263306],
 [0.9329842925071716],
 [0.9324948787689209],
 [0.9326227307319641],
 [0.9329482913017273],
 [0.934323787689209],
 [0.9346429109573364],
 [0.9260116815567017],
 [0.9329156875610352],
 [0.9320093393325806],
 [0.9302478432655334],
 [0.9287922382354736],
 [0.9238104820251465],
 [0.9318323731422424],
 [0.9344959259033203],
 [0.9293798208236694],
 [0.9314045310020447],
 [0.9294335842132568],
 [0.9345998764038086],
 [0.9342455863952637],
 [0.9339947700500488],
 [0.9317659139633179],
 [0.9368284940719604],
 [0.933342456817627],
 [0.9371731281280518],
 [0.9343671798706055],
 [0.929416298866272],
 [0.9372449517250061],
 [0.9312037229537964],
 [0.9377567172050476],
 [0.9371228218078613],
 [0.937757134437561],
 [0.936129

In [20]:
optuna_results

{'fold_0': {'best_params': {'num_layers': 1,
   'hidden_size': 256,
   'conv_dropout_rate': 0.3,
   'classifier_dropout_rate': 0.1,
   'use_layer_norm': False,
   'pool_hidden_size': 256,
   'num_epochs': 200,
   'batch_size': 32,
   'learning_rate': 0.00016847952401702993,
   'optimizer_scheduler': 'ReduceLROnPlateau'},
  'best_score': 0.937757134437561,
  'study': <optuna.study.study.Study at 0x2364c6d1c10>}}

In [23]:
pickled_study = optuna_results["fold_0"]["study"]

In [35]:
pickled_study.study_name

'no-name-f04c1bde-e879-4d38-9aee-bb8a7454aaee'

In [39]:
print(f"Loaded study with {len(pickled_study.trials)} trials")
print(f"Best value: {pickled_study.best_value}")

# Create a new study with SQLite storage
sqlite_study = optuna.create_study(
    study_name="gine_study_fold_00",
    storage="sqlite:///optuna_gine_20252008_215100.db",
    direction="maximize",  # or "minimize" depending on your original study
    load_if_exists=True
)

# Copy all trials from pickled study to SQLite study
for trial in pickled_study.trials:
    # Add each trial to the new study
    sqlite_study.add_trial(trial)

print(f"✅ Converted {len(sqlite_study.trials)} trials to SQLite")
print(f"Best value in SQLite study: {sqlite_study.best_value}")

[I 2025-08-20 22:01:38,586] A new study created in RDB with name: gine_study_fold_00


Loaded study with 50 trials
Best value: 0.937757134437561
✅ Converted 50 trials to SQLite
Best value in SQLite study: 0.937757134437561


In [42]:
import optuna
optuna.importance.get_param_importances(pickled_study)

{'learning_rate': 0.7748953462439422,
 'use_layer_norm': 0.06467817437922377,
 'hidden_size': 0.03659707765069031,
 'batch_size': 0.035762766894939874,
 'classifier_dropout_rate': 0.02626405077028361,
 'conv_dropout_rate': 0.024877122686233894,
 'num_layers': 0.01748525653032421,
 'num_epochs': 0.013675642188101328,
 'pool_hidden_size': 0.005764199542641594,
 'optimizer_scheduler': 3.6311361930541533e-07}

In [41]:
pickled_study.best_params

{'num_layers': 1,
 'hidden_size': 256,
 'conv_dropout_rate': 0.3,
 'classifier_dropout_rate': 0.1,
 'use_layer_norm': False,
 'pool_hidden_size': 256,
 'num_epochs': 200,
 'batch_size': 32,
 'learning_rate': 0.00016847952401702993,
 'optimizer_scheduler': 'ReduceLROnPlateau'}

In [15]:
import sys
import numpy as np
import dill

class MockTensor:
    def __init__(self, data, dtype=None, device=None, requires_grad=False):
        if isinstance(data, np.ndarray):
            self.data = data
        else:
            self.data = np.array(data)
        self.dtype = dtype
        self.device = device
        self.requires_grad = requires_grad
        self.shape = self.data.shape
        self.grad = None

    def numpy(self):
        return self.data

    def cpu(self):
        return MockTensor(self.data, self.dtype, 'cpu', self.requires_grad)

    def cuda(self):
        return MockTensor(self.data, self.dtype, 'cuda', self.requires_grad)

    def detach(self):
        return MockTensor(self.data, self.dtype, self.device, False)

    def clone(self):
        return MockTensor(self.data.copy(), self.dtype, self.device, self.requires_grad)

    def __array__(self):
        return self.data

    def size(self, dim=None):
        if dim is None:
            return self.shape
        return self.shape[dim]

    def __getitem__(self, key):
        return MockTensor(self.data[key], self.dtype, self.device, self.requires_grad)

class MockStorage:
    def __init__(self, data):
        self.data = np.array(data)

    def __getitem__(self, key):
        return self.data[key]

    def __len__(self):
        return len(self.data)

class MockTorch:
    Tensor = MockTensor

    # Mock data types
    float32 = np.float32
    float64 = np.float64
    int32 = np.int32
    int64 = np.int64
    uint8 = np.uint8

    # Mock devices
    class device:
        def __init__(self, device_str):
            self.type = device_str

    @staticmethod
    def tensor(data, dtype=None, device=None, requires_grad=False):
        return MockTensor(data, dtype, device, requires_grad)

    @staticmethod
    def from_numpy(arr):
        return MockTensor(arr)

    # Critical: Mock the internal tensor reconstruction functions
    @staticmethod
    def _rebuild_tensor_v2(storage, storage_offset, size, stride, requires_grad, backward_hooks):
        """This is the function PyTorch uses internally to rebuild tensors from pickle"""
        # Convert storage to numpy array
        if hasattr(storage, 'data'):
            data = storage.data
        else:
            data = np.array(storage)

        # Reshape according to size
        try:
            if len(size) > 0:
                data = data.reshape(size)
        except:
            pass

        return MockTensor(data, requires_grad=requires_grad)

    @staticmethod
    def _rebuild_tensor(storage, storage_offset, size, stride):
        """Older version of tensor rebuild function"""
        if hasattr(storage, 'data'):
            data = storage.data
        else:
            data = np.array(storage)

        try:
            if len(size) > 0:
                data = data.reshape(size)
        except:
            pass

        return MockTensor(data)

    # Mock storage types
    @staticmethod
    def FloatStorage(*args, **kwargs):
        return MockStorage(np.array(args[0] if args else [], dtype=np.float32))

    @staticmethod
    def DoubleStorage(*args, **kwargs):
        return MockStorage(np.array(args[0] if args else [], dtype=np.float64))

    @staticmethod
    def LongStorage(*args, **kwargs):
        return MockStorage(np.array(args[0] if args else [], dtype=np.int64))

    @staticmethod
    def IntStorage(*args, **kwargs):
        return MockStorage(np.array(args[0] if args else [], dtype=np.int32))

# Create the mock modules
torch_mock = MockTorch()
sys.modules['torch'] = torch_mock

# Also add the rebuild functions to the torch module directly
torch_mock._rebuild_tensor_v2 = MockTorch._rebuild_tensor_v2
torch_mock._rebuild_tensor = MockTorch._rebuild_tensor

# Mock other common PyTorch modules
sys.modules['torch.nn'] = type(sys)('torch.nn')
sys.modules['torch.nn.functional'] = type(sys)('torch.nn.functional')
sys.modules['torch_geometric'] = type(sys)('torch_geometric')

# Now try loading your pickle file
try:
    with open('all_folds_best_params_results_20250822_022835_dill.pkl', 'rb') as f:
        data = dill.load(f)
    print("Successfully loaded!")
    print(f"Data type: {type(data)}")

    # You can now access tensor data as NumPy arrays
    # For example, if data contains tensors:
    # numpy_array = some_tensor.numpy()

except Exception as e:
    print(f"Error: {e}")
    # If you still get errors, print them so we can add more mocks

Error: Can't get attribute '_rebuild_tensor_v2' on <__main__.MockTorch object at 0x000001AF998F5910>


In [13]:
import sys
from safetensors.numpy import save_file, load_file
import numpy as np
import sys

class MockTensor:
    def __init__(self, data, dtype=None, device=None):
        if isinstance(data, np.ndarray):
            self.data = data
        else:
            self.data = np.array(data)
        self.dtype = dtype
        self.device = device
        self.shape = self.data.shape

    def numpy(self):
        return self.data

    def cpu(self):
        return MockTensor(self.data, self.dtype, 'cpu')

    def cuda(self):
        return MockTensor(self.data, self.dtype, 'cuda')

    def detach(self):
        return self

    def clone(self):
        return MockTensor(self.data.copy(), self.dtype, self.device)

    def __array__(self):
        return self.data

class MockTorch:
    Tensor = MockTensor
    float32 = np.float32
    float64 = np.float64
    int32 = np.int32
    int64 = np.int64

    @staticmethod
    def tensor(data, dtype=None):
        return MockTensor(data, dtype)

    @staticmethod
    def from_numpy(arr):
        return MockTensor(arr)


# Mock torch and related modules
sys.modules['torch'] = MockTorch()
sys.modules['torch.nn'] = MockTorch()
sys.modules['torch.nn.functional'] = MockTorch()
sys.modules['torch_geometric'] = MockTorch()
sys.modules['torch._utils'] = MockTorch()
sys.modules['torch.storage'] = MockTorch()

import dill
all_results = {}
for fold in range(1,10):
    with open(rf"fold_{fold}results.pkl", "rb") as f:
        fold_results = dill.load(f)
    all_results[f"fold_{fold}"] = fold_results

AttributeError: Can't get attribute '_rebuild_tensor_v2' on <__main__.MockTorch object at 0x000001AF998F5910>

In [9]:
fold_results

{'best_params': {'num_layers': 1,
  'hidden_size': 256,
  'conv_dropout_rate': 0.3,
  'classifier_dropout_rate': 0.1,
  'use_layer_norm': False,
  'pool_hidden_size': 256,
  'num_epochs': 200,
  'batch_size': 32,
  'learning_rate': 0.00016847952401702993,
  'optimizer_scheduler': 'ReduceLROnPlateau'},
 'train_tracker': {'acc': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>,
  'auroc': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>,
  'f1': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>},
 'val_tracker': {'acc': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>,
  'auroc': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>,
  'f1': <MagicMock name='mock._rebuild_tensor_v2()' id='1853740023680'>}}