In [1]:
import torch
import scipy.io
import numpy as np
from torch_geometric.data import Data
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import torch_geometric.transforms as T

# Check if CUDA is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")


# Function to load and process the data
def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(data['network'].nonzero(), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 9)
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

# Load datasets
print("Loading datasets...")
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

# Apply node-wise normalization
transform = T.NormalizeFeatures()
train_data = transform(train_data)
test_data = transform(test_data)

# Keep data on CPU
train_data.x = train_data.x.cpu()
train_data.edge_index = train_data.edge_index.cpu()
train_data.edge_attr = train_data.edge_attr.cpu()
train_data.y = train_data.y.cpu()

test_data.x = test_data.x.cpu()
test_data.edge_index = test_data.edge_index.cpu()
test_data.edge_attr = test_data.edge_attr.cpu()
test_data.y = test_data.y.cpu()

# Custom sampler for batch training
class GraphSampler:
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        self.num_nodes = data.x.size(0)

    def __iter__(self):
        node_indices = torch.randperm(self.num_nodes)
        for i in range(0, self.num_nodes, self.batch_size):
            batch_indices = node_indices[i:i+self.batch_size]
            edge_mask = (self.data.edge_index[0].unsqueeze(1) == batch_indices).any(1)
            batch_edge_index = self.data.edge_index[:, edge_mask]
            batch_edge_attr = self.data.edge_attr[edge_mask]
            yield batch_indices, batch_edge_index, batch_edge_attr

    def __len__(self):
        return (self.num_nodes + self.batch_size - 1) // self.batch_size

# Instantiate the pre-trained model from pretrain-gnns
print("Loading pre-trained model...")
num_node_features = train_data.num_node_features
num_classes = train_data.y.max().item() + 1
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin', drop_ratio=0.5, JK="last")
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Training loop with mixed precision
print("Starting training...")
model.train()
num_epochs = 25
batch_size = 500 

for epoch in range(num_epochs):
    total_loss = 0
    sampler = GraphSampler(train_data, batch_size)
    for batch_indices, batch_edge_index, batch_edge_attr in tqdm(sampler, desc=f"Epoch {epoch+1}/{num_epochs}"):
        optimizer.zero_grad()
        with autocast():
            batch_x = train_data.x[batch_indices].to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            batch_y = train_data.y[batch_indices].to(device)
            
            output = model(batch_x, batch_edge_index, batch_edge_attr)
            loss = criterion(output, batch_y)
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(sampler):.4f}')

# Evaluation
print("Evaluating model...")
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    sampler = GraphSampler(test_data, batch_size)
    for batch_indices, batch_edge_index, batch_edge_attr in tqdm(sampler, desc="Evaluation"):
        with autocast():
            batch_x = test_data.x[batch_indices].to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            
            output = model(batch_x, batch_edge_index, batch_edge_attr)
        
        predictions = torch.argmax(output, dim=1)
        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(test_data.y[batch_indices].numpy())

accuracy = accuracy_score(all_labels, all_preds)
micro_f1 = f1_score(all_labels, all_preds, average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

KeyboardInterrupt: 

# Work in the below cell-best code

In [1]:
import torch
import scipy.io
import numpy as np
from torch_geometric.data import Data
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import torch_geometric.transforms as T

# Check if CUDA is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Function to load and process the data
def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(np.array(data['network'].nonzero()), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 9)  # Create edge attributes
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

# Load datasets
print("Loading datasets...")
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

# Apply node-wise normalization
transform = T.NormalizeFeatures()
train_data = transform(train_data)
test_data = transform(test_data)

# Keep data on CPU
train_data = train_data.to('cpu')
test_data = test_data.to('cpu')

# Custom sampler for batch training
class GraphSampler:
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        self.num_nodes = data.x.size(0)

    def __iter__(self):
        node_indices = torch.randperm(self.num_nodes)
        for i in range(0, self.num_nodes, self.batch_size):
            batch_indices = node_indices[i:i+self.batch_size]
            edge_mask = (self.data.edge_index[0].unsqueeze(1) == batch_indices.unsqueeze(0)).any(1)
            batch_edge_index = self.data.edge_index[:, edge_mask]
            batch_edge_attr = self.data.edge_attr[edge_mask]
            
            # Remap node indices to ensure they are consecutive
            node_map = {int(idx.item()): i for i, idx in enumerate(batch_indices)}
            batch_edge_index = torch.tensor([[node_map.get(int(idx.item()), -1) for idx in batch_edge_index[0]],
                                             [node_map.get(int(idx.item()), -1) for idx in batch_edge_index[1]]], 
                                            dtype=torch.long)
            
            # Remove any edges that point to nodes not in this batch
            valid_edges = (batch_edge_index[0] != -1) & (batch_edge_index[1] != -1)
            batch_edge_index = batch_edge_index[:, valid_edges]
            batch_edge_attr = batch_edge_attr[valid_edges]
            
            # Ensure batch_edge_index is a 2D LongTensor
            batch_edge_index = batch_edge_index.long()
            
            yield batch_indices, batch_edge_index, batch_edge_attr

    def __len__(self):
        return (self.num_nodes + self.batch_size - 1) // self.batch_size

# Instantiate the pre-trained model
print("Loading pre-trained model...")
num_node_features = train_data.num_node_features
num_classes = train_data.y.max().item() + 1
model = GNN(num_layer=5, emb_dim=300, JK="last", drop_ratio=0.5, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

# Add a classification layer
model.classifier = torch.nn.Linear(300, num_classes).to(device)

# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Training loop with mixed precision and gradient accumulation
print("Starting training...")
model.train()
num_epochs = 20
batch_size = 1  # Reduced batch size
accumulation_steps = 1000  # Increased accumulation steps

for epoch in range(num_epochs):
    total_loss = 0
    sampler = GraphSampler(train_data, batch_size)
    optimizer.zero_grad()  # Zero gradients at the beginning of each epoch
    
    for i, (batch_indices, batch_edge_index, batch_edge_attr) in enumerate(tqdm(sampler, desc=f"Epoch {epoch+1}/{num_epochs}")):
        with autocast():
            batch_x = train_data.x.to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            batch_y = train_data.y[batch_indices].to(device)
            
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr, batch_indices)
            output = model.classifier(node_representation)
            loss = criterion(output, batch_y)
            loss = loss / accumulation_steps  # Normalize the loss
        
        scaler.scale(loss).backward()
        total_loss += loss.item() * accumulation_steps

        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        # Clear unnecessary tensors from GPU
        del batch_x, batch_edge_index, batch_edge_attr, batch_y, node_representation, output
        torch.cuda.empty_cache()
    
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(sampler):.4f}')

# Evaluation (keep this part as it is)
print("Evaluating model...")
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    sampler = GraphSampler(test_data, batch_size)
    for batch_indices, batch_edge_index, batch_edge_attr in tqdm(sampler, desc="Evaluation"):
        with autocast():
            batch_x = test_data.x[batch_indices].to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr)
            output = model.classifier(node_representation)
        
        predictions = torch.argmax(output, dim=1)
        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(test_data.y[batch_indices].numpy())

        # Clear unnecessary tensors from GPU
        del batch_x, batch_edge_index, batch_edge_attr, node_representation, output
        torch.cuda.empty_cache()

accuracy = accuracy_score(all_labels, all_preds)
micro_f1 = f1_score(all_labels, all_preds, average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...


Epoch 1/20:   0%|          | 0/9360 [00:07<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 70.87 GiB. GPU 0 has a total capacity of 4.00 GiB of which 2.51 GiB is free. Of the allocated memory 750.88 MiB is allocated by PyTorch, and 21.12 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# Another try for the above code to work with memory efficiency

In [1]:
import torch
import scipy.io
import numpy as np
from torch_geometric.data import Data
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import torch_geometric.transforms as T

# Check if CUDA is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Function to load and process the data
def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(np.array(data['network'].nonzero()), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 9)  # Create edge attributes
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

# Load datasets
print("Loading datasets...")
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

# Apply node-wise normalization
transform = T.NormalizeFeatures()
train_data = transform(train_data)
test_data = transform(test_data)

# Keep data on CPU
train_data = train_data.to('cpu')
test_data = test_data.to('cpu')

# Custom sampler for batch training
class GraphSampler:
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        self.num_nodes = data.x.size(0)

    def __iter__(self):
        node_indices = torch.randperm(self.num_nodes)
        for i in range(0, self.num_nodes, self.batch_size):
            batch_indices = node_indices[i:i+self.batch_size]
            edge_mask = (self.data.edge_index[0].unsqueeze(1) == batch_indices.unsqueeze(0)).any(1)
            batch_edge_index = self.data.edge_index[:, edge_mask]
            batch_edge_attr = self.data.edge_attr[edge_mask]
            
            # Remap node indices to ensure they are consecutive
            node_map = {int(idx.item()): i for i, idx in enumerate(batch_indices)}
            batch_edge_index = torch.tensor([[node_map.get(int(idx.item()), -1) for idx in batch_edge_index[0]],
                                             [node_map.get(int(idx.item()), -1) for idx in batch_edge_index[1]]], 
                                            dtype=torch.long)
            
            # Remove any edges that point to nodes not in this batch
            valid_edges = (batch_edge_index[0] != -1) & (batch_edge_index[1] != -1)
            batch_edge_index = batch_edge_index[:, valid_edges]
            batch_edge_attr = batch_edge_attr[valid_edges]
            
            # Ensure batch_edge_index is a 2D LongTensor
            batch_edge_index = batch_edge_index.long()
            
            yield batch_indices, batch_edge_index, batch_edge_attr

    def __len__(self):
        return (self.num_nodes + self.batch_size - 1) // self.batch_size

# Instantiate the pre-trained model
print("Loading pre-trained model...")
num_node_features = train_data.num_node_features
num_classes = train_data.y.max().item() + 1
model = GNN(num_layer=5, emb_dim=300, JK="last", drop_ratio=0.5, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

# Add a classification layer
model.classifier = torch.nn.Linear(300, num_classes).to(device)

# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.0005)  # Reduced learning rate
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

print("Starting training...")
model.train()
num_epochs = 20
batch_size = 1  # Keep batch size at 1
accumulation_steps = 10000  # Increased accumulation steps

for epoch in range(num_epochs):
    total_loss = 0
    sampler = GraphSampler(train_data, batch_size)
    optimizer.zero_grad()  # Zero gradients at the beginning of each epoch
    
    for i, (batch_indices, batch_edge_index, batch_edge_attr) in enumerate(tqdm(sampler, desc=f"Epoch {epoch+1}/{num_epochs}")):
        with autocast():
            batch_x = train_data.x.to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            batch_y = train_data.y[batch_indices].to(device)
            
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr, batch_indices)
            output = model.classifier(node_representation[batch_indices])
            loss = criterion(output, batch_y)
            loss = loss / accumulation_steps  # Normalize the loss

        scaler.scale(loss).backward()
        total_loss += loss.item() * accumulation_steps

        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        # Clear unnecessary tensors from GPU
        del batch_x, batch_edge_index, batch_edge_attr, batch_y, node_representation, output
        torch.cuda.empty_cache()
    
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(sampler):.4f}')

# Evaluation
print("Evaluating model...")
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    sampler = GraphSampler(test_data, batch_size)
    for batch_indices, batch_edge_index, batch_edge_attr in tqdm(sampler, desc="Evaluation"):
        with autocast():
            batch_x = test_data.x.to(device)
            batch_edge_index = batch_edge_index.to(device)
            batch_edge_attr = batch_edge_attr.to(device)
            
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr)
            output = model.classifier(node_representation[batch_indices])
        
        predictions = torch.argmax(output, dim=1)
        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(test_data.y[batch_indices].numpy())

        # Clear unnecessary tensors from GPU
        del batch_x, batch_edge_index, batch_edge_attr, node_representation, output
        torch.cuda.empty_cache()

accuracy = accuracy_score(all_labels, all_preds)
micro_f1 = f1_score(all_labels, all_preds, average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...


Epoch 1/20:   0%|          | 0/9360 [00:10<?, ?it/s]


OutOfMemoryError: CUDA out of memory. Tried to allocate 70.87 GiB. GPU 0 has a total capacity of 4.00 GiB of which 2.51 GiB is free. Of the allocated memory 750.72 MiB is allocated by PyTorch, and 21.28 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

# another attempt

In [1]:
import torch
import scipy.io
import numpy as np
from torch_geometric.data import Data
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import torch_geometric.transforms as T
from torch.utils.checkpoint import checkpoint

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(np.array(data['network'].nonzero()), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 9)
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

print("Loading datasets...")
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

transform = T.NormalizeFeatures()
train_data = transform(train_data)
test_data = transform(test_data)

class CPUOffloadGraphSampler:
    def __init__(self, data, batch_size):
        self.data = data
        self.batch_size = batch_size
        self.num_nodes = data.x.size(0)

    def __iter__(self):
        node_indices = torch.randperm(self.num_nodes)
        for i in range(0, self.num_nodes, self.batch_size):
            batch_indices = node_indices[i:i+self.batch_size]
            edge_mask = (self.data.edge_index[0].unsqueeze(1) == batch_indices.unsqueeze(0)).any(1)
            batch_edge_index = self.data.edge_index[:, edge_mask]
            batch_edge_attr = self.data.edge_attr[edge_mask]
            
            node_map = {int(idx.item()): i for i, idx in enumerate(batch_indices)}
            batch_edge_index = torch.tensor([[node_map.get(int(idx.item()), -1) for idx in batch_edge_index[0]],
                                             [node_map.get(int(idx.item()), -1) for idx in batch_edge_index[1]]], 
                                            dtype=torch.long)
            
            valid_edges = (batch_edge_index[0] != -1) & (batch_edge_index[1] != -1)
            batch_edge_index = batch_edge_index[:, valid_edges]
            batch_edge_attr = batch_edge_attr[valid_edges]
            
            batch_x = self.data.x[batch_indices]
            batch_y = self.data.y[batch_indices]
            
            yield batch_indices, batch_x, batch_edge_index, batch_edge_attr, batch_y

    def __len__(self):
        return (self.num_nodes + self.batch_size - 1) // self.batch_size

class MemoryEfficientGNN(GNN):
    def forward(self, x, edge_index, edge_attr, batch_indices):
        h_list = [x]
        for layer in range(self.num_layer):
            h = checkpoint(self.gnns[layer], h_list[layer], edge_index, edge_attr, use_reentrant=False)
            if layer == self.num_layer - 1:
                h = torch.nn.functional.dropout(h, self.drop_ratio, training=self.training)
            else:
                h = torch.nn.functional.dropout(torch.nn.functional.relu(h), self.drop_ratio, training=self.training)
            h_list.append(h)

        if self.JK == "last":
            node_representation = h_list[-1]
        elif self.JK == "sum":
            node_representation = sum(h_list[1:])

        return node_representation[batch_indices]

print("Loading pre-trained model...")
num_node_features = train_data.num_node_features
num_classes = train_data.y.max().item() + 1
model = MemoryEfficientGNN(num_layer=5, emb_dim=300, JK="last", drop_ratio=0.5, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

model.classifier = torch.nn.Linear(300, num_classes).to(device)

optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

print("Starting training...")
model.train()
num_epochs = 20
batch_size = 32
accumulation_steps = 256

for epoch in range(num_epochs):
    total_loss = 0
    sampler = CPUOffloadGraphSampler(train_data, batch_size)
    optimizer.zero_grad()
    
    for i, (batch_indices, batch_x, batch_edge_index, batch_edge_attr, batch_y) in enumerate(tqdm(sampler, desc=f"Epoch {epoch+1}/{num_epochs}")):
        batch_x = batch_x.to(device)
        batch_edge_index = batch_edge_index.to(device)
        batch_edge_attr = batch_edge_attr.to(device)
        batch_y = batch_y.to(device)
        
        with autocast():
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr, torch.arange(len(batch_indices)))
            output = model.classifier(node_representation)
            loss = criterion(output, batch_y)
            loss = loss / accumulation_steps

        scaler.scale(loss).backward()
        total_loss += loss.item() * accumulation_steps

        if (i + 1) % accumulation_steps == 0:
            scaler.step(optimizer)
            scaler.update()
            optimizer.zero_grad()

        del batch_x, batch_edge_index, batch_edge_attr, batch_y, node_representation, output
        torch.cuda.empty_cache()
    
    print(f'Epoch {epoch+1}, Loss: {total_loss/len(sampler):.4f}')

print("Evaluating model...")
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    sampler = CPUOffloadGraphSampler(test_data, batch_size)
    for batch_indices, batch_x, batch_edge_index, batch_edge_attr, batch_y in tqdm(sampler, desc="Evaluation"):
        batch_x = batch_x.to(device)
        batch_edge_index = batch_edge_index.to(device)
        batch_edge_attr = batch_edge_attr.to(device)
        
        with autocast():
            node_representation = model(batch_x, batch_edge_index, batch_edge_attr, torch.arange(len(batch_indices)))
            output = model.classifier(node_representation)
        
        predictions = torch.argmax(output, dim=1)
        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(batch_y.numpy())

        del batch_x, batch_edge_index, batch_edge_attr, node_representation, output
        torch.cuda.empty_cache()

accuracy = accuracy_score(all_labels, all_preds)
micro_f1 = f1_score(all_labels, all_preds, average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...


Epoch 1/20: 100%|██████████| 293/293 [23:44<00:00,  4.86s/it]


Epoch 1, Loss: nan


Epoch 2/20:   2%|▏         | 7/293 [00:35<24:20,  5.11s/it]


KeyboardInterrupt: 

In [None]:
import torch
import scipy.io
import numpy as np
from torch_geometric.data import Data
from pretrain_gnns.bio.model import GNN
from torch.cuda.amp import GradScaler, autocast
from tqdm import tqdm
from sklearn.metrics import accuracy_score, f1_score
import torch_geometric.transforms as T

# Check if CUDA is available and use it if possible
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Function to load and process the data
def load_graph_data(filepath):
    data = scipy.io.loadmat(filepath)
    x = torch.tensor(data['attrb'].todense(), dtype=torch.float32)
    edge_index = torch.tensor(data['network'].nonzero(), dtype=torch.long)
    edge_attr = torch.ones(edge_index.shape[1], 1)  # Changed to 1-dimensional edge attributes
    y = torch.tensor(data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
    return Data(x=x, edge_index=edge_index, edge_attr=edge_attr, y=y)

# Load datasets
print("Loading datasets...")
train_data = load_graph_data('acmv9.mat')
test_data = load_graph_data('citationv1.mat')

# Apply node-wise normalization
transform = T.NormalizeFeatures()
train_data = transform(train_data)
test_data = transform(test_data)

# Ensure edge_index is in the correct format (2, num_edges)
train_data.edge_index = train_data.edge_index.t().contiguous()
test_data.edge_index = test_data.edge_index.t().contiguous()

# Move data to device
train_data = train_data.to(device)
test_data = test_data.to(device)

# Instantiate the pre-trained model from pretrain-gnns
print("Loading pre-trained model...")
num_node_features = train_data.num_node_features
num_classes = train_data.y.max().item() + 1
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin', drop_ratio=0.5, JK="last")
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

# Set up optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Training loop with mixed precision
print("Starting training...")
model.train()
num_epochs = 25

for epoch in range(num_epochs):
    total_loss = 0
    model.train()
    for batch in tqdm(range(0, train_data.num_nodes, 1000), desc=f"Epoch {epoch+1}/{num_epochs}"):
        optimizer.zero_grad()
        with autocast():
            out = model(train_data.x, train_data.edge_index, train_data.edge_attr)
            loss = criterion(out[batch:batch+1000], train_data.y[batch:batch+1000])
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        total_loss += loss.item()
    
    print(f'Epoch {epoch+1}, Loss: {total_loss:.4f}')

# Evaluation
print("Evaluating model...")
model.eval()
all_preds = []
all_labels = []

with torch.no_grad():
    for batch in tqdm(range(0, test_data.num_nodes, 1000), desc="Evaluation"):
        with autocast():
            out = model(test_data.x, test_data.edge_index, test_data.edge_attr)
            predictions = torch.argmax(out[batch:batch+1000], dim=1)
        all_preds.extend(predictions.cpu().numpy())
        all_labels.extend(test_data.y[batch:batch+1000].cpu().numpy())

accuracy = accuracy_score(all_labels, all_preds)
micro_f1 = f1_score(all_labels, all_preds, average='micro')
print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...


Epoch 1/25:   0%|          | 0/10 [00:02<?, ?it/s]


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 31158 but got size 2 for tensor number 1 in the list.

In [None]:
import torch
import scipy.io
from sklearn.metrics import accuracy_score, f1_score
from torch_geometric.data import Data, DataLoader
from torch.cuda.amp import GradScaler, autocast
from pretrain_gnns.bio.model import GNN

# Device configuration
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

# Data preparation
def custom_collate_fn(data_list):
    batch = Data.from_data_list(data_list)
    if 'edge_attr' in data_list[0]:
        batch.edge_attr = torch.cat([data.edge_attr for data in data_list], dim=0)
    return batch

# Load and prepare datasets
acm_data = scipy.io.loadmat('acmv9.mat')
citation_data = scipy.io.loadmat('citationv1.mat')
train_data = Data(
    x=torch.tensor(acm_data['attrb'].todense(), dtype=torch.float32),
    edge_index=torch.tensor(acm_data['network'].nonzero(), dtype=torch.long),
    edge_attr=torch.tensor(acm_data['network'].data, dtype=torch.float32).view(-1, 9),
    y=torch.tensor(acm_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
)

# Model initialization
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth'))
model.to(device)

# DataLoader
train_loader = DataLoader([train_data], batch_size=1, shuffle=True, collate_fn=custom_collate_fn)

# Optimizer and loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

# Training loop with debugging
model.train()
for epoch in range(25):
    for batch in train_loader:
        optimizer.zero_grad()
        with autocast():
            x, edge_index, edge_attr = batch.x.to(device), batch.edge_index.to(device), batch.edge_attr.to(device)
            print(f"x shape: {x.shape}, edge_index shape: {edge_index.shape}, edge_attr shape: {edge_attr.shape}")
            output = model(x, edge_index, edge_attr)
            loss = criterion(output, batch.y.to(device))
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
        print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

# Evaluation
model.eval()
with torch.no_grad():
    x, edge_index, edge_attr, y = test_data.x.to(device), test_data.edge_index.to(device), test_data.edge_attr.to(device), test_data.y.to(device)
    output = model(x, edge_index, edge_attr)
    predictions = torch.argmax(output, dim=1)
    accuracy = accuracy_score(y.cpu(), predictions.cpu())
    micro_f1 = f1_score(y.cpu(), predictions.cpu(), average='micro')
    print(f'Accuracy: {accuracy:.4f}, Micro F1 Score: {micro_f1:.4f}')


Using device: cuda
x shape: torch.Size([9360, 6775]), edge_index shape: torch.Size([2, 31158]), edge_attr shape: torch.Size([3462, 9])


OutOfMemoryError: CUDA out of memory. Tried to allocate 70.87 GiB. GPU 0 has a total capacity of 4.00 GiB of which 0 bytes is free. Of the allocated memory 3.48 GiB is allocated by PyTorch, and 235.18 MiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)

In [None]:
#claude
import torch
import torch.nn.functional as F
import scipy.io
from sklearn.metrics import accuracy_score, f1_score
from torch_geometric.data import Data, DataLoader
from torch.cuda.amp import GradScaler, autocast
from pretrain_gnns.bio.loader import BioDataset
from pretrain_gnns.bio.dataloader import DataLoaderFinetune
from pretrain_gnns.bio.model import GNN

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def custom_collate_fn(data_list):
    batch = Data.from_data_list(data_list)
    if 'edge_attr' in data_list[0]:
        batch.edge_attr = torch.cat([data.edge_attr for data in data_list], dim=0)
    return batch

print("Loading datasets...")
# Loading the .mat files into the format compatible with the model
acm_data = scipy.io.loadmat('acmv9.mat')
citation_data = scipy.io.loadmat('citationv1.mat')

# Create PyTorch Geometric data objects for training and testing
train_data = Data(
    x=torch.tensor(acm_data['attrb'].todense(), dtype=torch.float32),
    edge_index=torch.tensor(acm_data['network'].nonzero(), dtype=torch.long),
    edge_attr=torch.tensor(acm_data['network'].data, dtype=torch.float32),
    y=torch.tensor(acm_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
)

test_data = Data(
    x=torch.tensor(citation_data['attrb'].todense(), dtype=torch.float32),
    edge_index=torch.tensor(citation_data['network'].nonzero(), dtype=torch.long),
    edge_attr=torch.tensor(citation_data['network'].data, dtype=torch.float32),
    y=torch.tensor(citation_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
)

print("Loading pre-trained model...")
# Load the pre-trained model
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin') # Adjust parameters if needed
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth'))
model = model.to(device)

# Set up the DataLoader with the custom collate function
train_loader = DataLoader(
    [train_data], # wrap the data in a list
    batch_size=1, # since we are using a small dataset
    shuffle=True,
    collate_fn=custom_collate_fn
)

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

print("Starting training...")
# Training Loop
model.train()
for epoch in range(25): # Assuming 25 epochs, adjust as needed
    for batch in train_loader:
        optimizer.zero_grad()
        with autocast():
            # Debug shapes before passing to the model
            print(f"x shape: {batch.x.shape}")
            print(f"edge_attr shape: {batch.edge_attr.shape}")

            x, edge_index = batch.x.to(device), batch.edge_index.to(device)
            edge_attr = batch.edge_attr.to(device) if batch.edge_attr is not None else None

            # If edge_attr is 1D, uncomment the following line:
            edge_attr = edge_attr.unsqueeze(1)

            # Forward pass
            output = model(x, edge_index, edge_attr)
            loss = criterion(output, batch.y.to(device))

        # Backward pass
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

print("Evaluating model...")
# Evaluation
model.eval()
with torch.no_grad():
    for batch in [test_data]: # wrap the test data in a list
        x, edge_index = batch.x.to(device), batch.edge_index.to(device)
        edge_attr = batch.edge_attr.to(device) if batch.edge_attr is not None else None

        # If edge_attr is 1D, uncomment the following line:
        # edge_attr = edge_attr.unsqueeze(1)

        output = model(x, edge_index, edge_attr)
        predictions = torch.argmax(output, dim=1)
        accuracy = accuracy_score(batch.y.cpu(), predictions.cpu())
        micro_f1 = f1_score(batch.y.cpu(), predictions.cpu(), average='micro')

print(f'Accuracy: {accuracy:.4f}')
print(f'Micro F1 Score: {micro_f1:.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...
x shape: torch.Size([9360, 6775])
edge_attr shape: torch.Size([31158])


RuntimeError: Sizes of tensors must match except in dimension 0. Expected size 1 but got size 9 for tensor number 1 in the list.

In [None]:
#perplexity
import torch
import torch.nn.functional as F
import scipy.io
from sklearn.metrics import accuracy_score, f1_score
from torch_geometric.data import Data, DataLoader
from torch.cuda.amp import GradScaler, autocast
from pretrain_gnns.bio.model import GNN

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}")

def custom_collate_fn(data_list):
    batch = Data.from_data_list(data_list)
    if 'edge_attr' in data_list:
        batch.edge_attr = torch.cat([data.edge_attr for data in data_list], dim=0)
    return batch

print("Loading datasets...")
# Loading the .mat files into the format compatible with the model
acm_data = scipy.io.loadmat('acmv9.mat')
citation_data = scipy.io.loadmat('citationv1.mat')

# Create PyTorch Geometric data objects for training and testing
train_data = Data(
    x=torch.tensor(acm_data['attrb'].todense(), dtype=torch.float32),
    edge_index=torch.tensor(acm_data['network'].nonzero(), dtype=torch.long).t().contiguous(),
    edge_attr=torch.tensor(acm_data['network'].data.reshape(-1, 1), dtype=torch.float32),
    y=torch.tensor(acm_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
)

test_data = Data(
    x=torch.tensor(citation_data['attrb'].todense(), dtype=torch.float32),
    edge_index=torch.tensor(citation_data['network'].nonzero(), dtype=torch.long).t().contiguous(),
    edge_attr=torch.tensor(citation_data['network'].data.reshape(-1, 1), dtype=torch.float32),
    y=torch.tensor(citation_data['group'].argmax(axis=1).squeeze(), dtype=torch.long)
)

print("Loading pre-trained model...")
# Load the pre-trained model
model = GNN(num_layer=5, emb_dim=300, gnn_type='gin')
model.load_state_dict(torch.load('pretrain_gnns/bio/model_gin/supervised.pth', map_location=device))
model = model.to(device)

# Set up the DataLoader with the custom collate function
train_loader = DataLoader(
    [train_data],
    batch_size=1,
    shuffle=True,
    collate_fn=custom_collate_fn
)

# Set up the optimizer and loss function
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
criterion = torch.nn.CrossEntropyLoss()
scaler = GradScaler()

print("Starting training...")
# Training Loop
model.train()
for epoch in range(25):
    for batch in train_loader:
        optimizer.zero_grad()
        with autocast():
            x, edge_index = batch.x.to(device), batch.edge_index.to(device)
            edge_attr = batch.edge_attr.to(device) if 'edge_attr' in batch else None
            output = model(x, edge_index, edge_attr)
            loss = criterion(output, batch.y.to(device))
        
        scaler.scale(loss).backward()
        scaler.step(optimizer)
        scaler.update()
    print(f'Epoch {epoch+1}, Loss: {loss.item():.4f}')

Using device: cuda
Loading datasets...
Loading pre-trained model...
Starting training...


RuntimeError: Sizes of tensors must match except in dimension 1. Expected size 31158 but got size 2 for tensor number 1 in the list.