In [65]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import softmax

class AttentionModule(nn.Module):
    def __init__(self, in_features, out_features):
        super(AttentionModule, self).__init__()
        self.lin = nn.Linear(in_features, out_features, bias=False)
        self.att = nn.Parameter(torch.Tensor(1, out_features))
        nn.init.xavier_uniform_(self.att.data)

    def forward(self, x, index):
        x = self.lin(x)
        alpha = (x * self.att).sum(dim=-1)
        alpha = softmax(alpha, index=index)  # Pass the index to softmax
        return alpha


class StarMultigraphGNN(MessagePassing):
    def __init__(self, edge_features, hidden_dim, num_classes, num_heads=4):
        super(StarMultigraphGNN, self).__init__(aggr="add")
        self.edge_embed = nn.Linear(edge_features, hidden_dim)
        self.attention = nn.ModuleList(
            [AttentionModule(hidden_dim, hidden_dim) for _ in range(num_heads)]
        )
        self.combine = nn.Linear(num_heads * hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, num_classes)
        self.num_heads = num_heads

    def forward(self, edge_index, edge_attr, edge_id, num_nodes):
        # Embed edge features
        edge_emb = self.edge_embed(edge_attr)

        # Perform message passing with attention
        x = self.propagate(
            edge_index, x=None, edge_attr=edge_emb, size=(num_nodes, num_nodes)
        )

        # Get center node features (assuming center node is 0)
        center_node_feat = x[0].unsqueeze(0)

        # Classification
        out = self.fc(center_node_feat)
        return F.log_softmax(out, dim=1)

    def message(self, edge_attr, index):
        # Apply multi-head attention
        alpha_list = [att(edge_attr, index=index) for att in self.attention]
        message_list = [alpha.unsqueeze(-1) * edge_attr for alpha in alpha_list]

        # Concatenate messages from all heads
        return torch.cat(message_list, dim=-1)

    def update(self, aggr_out):
        # Combine information from all heads
        return self.combine(aggr_out)


# Sử dụng mô hình



In [68]:
import glob
import os
import torch
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score~

# Load graph data and labels
directories = ["Star_graph/*/*.pt"]
file_count = {}
nodes_data = []
y = []

for directory in directories:
    file_paths = glob.glob(directory)
    file_count[directory] = len(file_paths)

    for file_path in file_paths:
        graph = torch.load(file_path)
        y.append(graph.y)
        edge_features = graph.edge_attr.size(1)
        model = StarMultigraphGNN(
            edge_features=edge_features, hidden_dim=64, num_classes=5, num_heads=8
        )
        model.eval()  # Set the model to evaluation mode
        with torch.no_grad():
            output = model(
                graph.edge_index,
                graph.edge_attr,
                graph.edge_id,
                graph.num_nodes,
            )
        nodes_data.append(output)

In [73]:
y

[1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,


In [69]:
nodes_data

[tensor([[    0.0000, -1422.3243,  -682.8345, -1811.3981, -2239.2217]]),
 tensor([[ -1075.9932,      0.0000,  -4922.2939,   -734.7773, -12259.6328]]),
 tensor([[-8687.9316,     0.0000, -2411.6318, -4465.9526, -8970.2002]]),
 tensor([[-4327.2974, -2037.3149, -4354.1025, -1582.3090,     0.0000]]),
 tensor([[-209605.6094,  -97140.6094, -196810.7188,       0.0000,  -32785.5898]]),
 tensor([[-4529.0195, -5952.1895, -6149.6050, -4639.3848,     0.0000]]),
 tensor([[-3620.0063,     0.0000, -3406.4004, -2411.2002, -6358.5322]]),
 tensor([[ -2062.1094,      0.0000,  -5391.1646, -49216.2031,   -486.3516]]),
 tensor([[-32581.6211, -56352.1953,   -152.5371, -33563.4375,      0.0000]]),
 tensor([[-120953.6875,       0.0000, -116522.9688,  -48953.5195, -123253.5469]]),
 tensor([[-3080.6008, -2926.9260, -3966.5474, -6966.1133,     0.0000]]),
 tensor([[-4821.4531, -3181.0488,     0.0000, -9711.3672, -5496.1895]]),
 tensor([[ -446.2107, -3878.2471, -2578.9648,  -373.5422,     0.0000]]),
 tensor([[-40087

In [70]:
probabilities = [F.softmax(logit, dim=1) for logit in nodes_data]
predicted_classes = [torch.argmax(prob, dim=1).item() for prob in probabilities]

In [71]:
predicted_classes

[0,
 1,
 1,
 4,
 3,
 4,
 1,
 1,
 4,
 1,
 4,
 2,
 4,
 4,
 0,
 4,
 3,
 1,
 4,
 0,
 0,
 0,
 3,
 2,
 1,
 4,
 2,
 4,
 2,
 3,
 4,
 2,
 2,
 2,
 1,
 4,
 1,
 1,
 2,
 3,
 1,
 4,
 0,
 0,
 2,
 2,
 0,
 2,
 3,
 4,
 4,
 0,
 1,
 4,
 3,
 0,
 3,
 0,
 4,
 1,
 1,
 2,
 0,
 4,
 0,
 2,
 0,
 3,
 2,
 4,
 3,
 4,
 3,
 3,
 0,
 0,
 3,
 1,
 3,
 3,
 0,
 2,
 2,
 4,
 1,
 1,
 1,
 0,
 1,
 2,
 0,
 4,
 3,
 0,
 2,
 0,
 3,
 4,
 0,
 2,
 2,
 3,
 1,
 4,
 0,
 4,
 3,
 4,
 1,
 3,
 3,
 1,
 4,
 3,
 3,
 0,
 1,
 4,
 1,
 0,
 0,
 4,
 0,
 2,
 1,
 2,
 4,
 0,
 2,
 1,
 1,
 3,
 4,
 4,
 0,
 3,
 4,
 1,
 3,
 1,
 2,
 2,
 4,
 2,
 0,
 2,
 1,
 4,
 3,
 2,
 2,
 3,
 2,
 3,
 2,
 2,
 2,
 1,
 4,
 4,
 4,
 0,
 4,
 0,
 0,
 1,
 2,
 3,
 0,
 2,
 0,
 0,
 4,
 3,
 0,
 1,
 2,
 1,
 4,
 4,
 0,
 2,
 0,
 0,
 0,
 4,
 1,
 0,
 4,
 2,
 4,
 3,
 0,
 3,
 4,
 2,
 2,
 0,
 1,
 3,
 2,
 2,
 0,
 2,
 4,
 2,
 2,
 3,
 2,
 2,
 1,
 2,
 1,
 0,
 0,
 3,
 4,
 2,
 2,
 4,
 4,
 4,
 4,
 0,
 0,
 1,
 0,
 1,
 4,
 4,
 3,
 0,
 2,
 3,
 3,
 3,
 0,
 4,
 0,
 1,
 0,
 3,
 0,
 4,
 1,
 0,
 1,
 1,
 2,
 4,


In [72]:
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    confusion_matrix,
)

# Example true labels (replace with your actual labels)
true_labels = y
# Calculate metrics
accuracy = accuracy_score(true_labels, predicted_classes)
precision = precision_score(true_labels, predicted_classes, average="weighted")
recall = recall_score(true_labels, predicted_classes, average="weighted")
f1 = f1_score(true_labels, predicted_classes, average="weighted")
conf_matrix = confusion_matrix(true_labels, predicted_classes)

print(f"Accuracy: {accuracy:.2f}")
print(f"Precision: {precision:.2f}")
print(f"Recall: {recall:.2f}")
print(f"F1 Score: {f1:.2f}")
print("Confusion Matrix:")
print(conf_matrix)

Accuracy: 0.20
Precision: 0.64
Recall: 0.20
F1 Score: 0.27
Confusion Matrix:
[[341 342 352 339 326]
 [ 22  20  20  18  24]
 [ 24  10  16  27  24]
 [ 21  25  23  16  27]
 [ 22  19  30  13  25]]


In [56]:
import numpy as np

# Assuming nodes_data has shape (num_samples, num_nodes, num_features)
# Aggregate node features by averaging or summing them
nodes_data_flat = [np.mean(data.numpy(), axis=0) for data in nodes_data]

# Convert to numpy array
nodes_data_flat = np.array(nodes_data_flat)


In [53]:
nodes_data_flat

array([[-1.7999916e+03, -5.3306616e+03, -1.7410391e+03, ...,
        -4.1084238e+03, -4.8195980e+04, -3.0416002e+06]], dtype=float32)

In [63]:
import numpy as np
import torch
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


nodes_data_flat = np.mean(nodes_data, axis=1)
y = np.array(y)  

X_train, X_test, y_train, y_test = train_test_split(
    nodes_data_flat, y, test_size=0.2, random_state=42
)

clf = RandomForestClassifier(n_estimators=100, random_state=42)
clf.fit(X_train, y_train)


y_pred = clf.predict(X_test)

accuracy = accuracy_score(y_test, y_pred)
report = classification_report(y_test, y_pred)

print(f"Accuracy: {accuracy:.4f}")
print("Classification Report:")
print(report)

Accuracy: 0.7911
Classification Report:
              precision    recall  f1-score   support

           0       0.79      0.99      0.88       338
           1       0.00      0.00      0.00        19
           2       0.00      0.00      0.00        24
           3       0.00      0.00      0.00        28
           4       0.50      0.06      0.11        17

    accuracy                           0.79       426
   macro avg       0.26      0.21      0.20       426
weighted avg       0.65      0.79      0.70       426



  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))
  _warn_prf(average, modifier, f"{metric.capitalize()} is", len(result))


In [64]:
y

array([1, 1, 1, ..., 4, 4, 4])

In [59]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score


# Define the Autoencoder model
class Autoencoder(nn.Module):
    def __init__(self, input_dim, latent_dim):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, 128), nn.ReLU(), nn.Linear(128, latent_dim)
        )
        self.decoder = nn.Sequential(
            nn.Linear(latent_dim, 128),
            nn.ReLU(),
            nn.Linear(128, input_dim),
            nn.Sigmoid(),  # Assuming input data is normalized between 0 and 1
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded, encoded


# Parameters
input_dim = nodes_data_flat.shape[1]
latent_dim = 50  # Number of dimensions in the compressed representation

# Initialize the autoencoder
autoencoder = Autoencoder(input_dim=input_dim, latent_dim=latent_dim)
criterion = nn.MSELoss()
optimizer = optim.Adam(autoencoder.parameters(), lr=1e-3)