In [None]:
حتما! اینجا کد کامل از ابتدا تا پایان هست که:

چندتا فایل JSON گراف با نود و یال و برچسب نود می‌خونه

گراف‌ها رو به صورت یک Batch می‌سازه

مدل GCN می‌سازه و نودها رو طبقه‌بندی می‌کنه

آموزش و ارزیابی می‌کنه

In [38]:
import json
import torch
import torch.nn.functional as F
from torch.nn import Linear, ModuleList
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.metrics import classification_report, confusion_matrix

# نگاشت‌های ویژگی‌های متنی به عدد
type_map = {}
value_map = {}
label_map = {'bug-free': 0, 'insert-node': 1, 'update': 2}

def encode_categorical(val, mapping):
    if val not in mapping:
        mapping[val] = len(mapping)
    return mapping[val]

def load_graph_from_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    x_list = []
    y_list = []
    id_map = {}
    for idx, node in enumerate(data['nodes']):
        node_id = node[0]
        id_map[node_id] = idx
        type_feat = encode_categorical(node[1], type_map)
        val_feat = encode_categorical(node[2], value_map)
        x_list.append([type_feat, val_feat])
        y_list.append(label_map[node[3]])

    x = torch.tensor(x_list, dtype=torch.float)
    y = torch.tensor(y_list, dtype=torch.long)

    edge_index = [[], []]
    for edge in data['edges']:
        src, dst, _ = edge  # وزن یا ویژگی سوم فعلاً استفاده نمی‌شود
        if src in id_map and dst in id_map:
            edge_index[0].append(id_map[src])
            edge_index[1].append(id_map[dst])

    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_index = edge_index.t().contiguous().t()  # تبدیل به شکل [2, num_edges]

    return Data(x=x, edge_index=edge_index, y=y)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.convs = ModuleList([
            GCNConv(in_channels, hidden_channels),
            GCNConv(hidden_channels, hidden_channels),
            GCNConv(hidden_channels, hidden_channels),
        ])
        self.classifier = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        return self.classifier(x)

def train(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = F.cross_entropy(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def test(model, loader, device):
    model.eval()
    y_true = []
    y_pred = []
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch.x, batch.edge_index)
            pred = out.argmax(dim=1)
            y_true.extend(batch.y.tolist())
            y_pred.extend(pred.tolist())
            correct += pred.eq(batch.y).sum().item()
            total += batch.y.size(0)  # تعداد نمونه‌ها نه تعداد نودها
    acc = correct / total
    return acc, y_true, y_pred

def main(json_files, batch_size=1, epochs=20, hidden_dim=64, lr=0.005):
    dataset = [load_graph_from_json(f) for f in json_files]
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    in_dim = dataset[0].x.size(1)
    out_dim = len(label_map)  # تعداد کلاس‌ها از دیکشنری label_map

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCN(in_dim, hidden_dim, out_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs + 1):
        loss = train(model, loader, optimizer, device)
        acc, y_true, y_pred = test(model, loader, device)
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

    print("\n\U0001F4CA Classification Report:")
    print(classification_report(y_true, y_pred, target_names=label_map.keys()))
    print("\n\U0001F9EE Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

if __name__ == '__main__':
    json_files = [
        "C:/Users/Leila/datasetpy/buggy/output_graph_with_synthetic.json",
        # فایل‌های دیگر را هم می‌توانی اضافه کنی
    ]
    main(json_files, batch_size=1, epochs=20, hidden_dim=64, lr=0.005)




Epoch 1, Loss: 1.0815, Accuracy: 0.4396
Epoch 2, Loss: 1.3302, Accuracy: 0.5894
Epoch 3, Loss: 1.1028, Accuracy: 0.7910
Epoch 4, Loss: 0.5429, Accuracy: 0.7662
Epoch 5, Loss: 0.3400, Accuracy: 0.9882
Epoch 6, Loss: 0.2095, Accuracy: 0.9785
Epoch 7, Loss: 0.1785, Accuracy: 0.9626
Epoch 8, Loss: 0.1790, Accuracy: 0.9767
Epoch 9, Loss: 0.1504, Accuracy: 0.9866
Epoch 10, Loss: 0.1127, Accuracy: 0.9882
Epoch 11, Loss: 0.0884, Accuracy: 0.9882
Epoch 12, Loss: 0.0973, Accuracy: 0.9882
Epoch 13, Loss: 0.0884, Accuracy: 0.9882
Epoch 14, Loss: 0.0728, Accuracy: 0.9882
Epoch 15, Loss: 0.0693, Accuracy: 0.9882
Epoch 16, Loss: 0.0689, Accuracy: 0.9882
Epoch 17, Loss: 0.0683, Accuracy: 0.9882
Epoch 18, Loss: 0.0680, Accuracy: 0.9882
Epoch 19, Loss: 0.0679, Accuracy: 0.9882
Epoch 20, Loss: 0.0677, Accuracy: 0.9882

📊 Classification Report:
              precision    recall  f1-score   support

    bug-free       0.99      1.00      1.00      2926
 insert-node       0.97      0.98      0.98       864


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
import json
import torch
import torch.nn.functional as F
from torch.nn import Linear, ModuleList
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.metrics import classification_report, confusion_matrix

# نگاشت‌های ویژگی‌های متنی به عدد
type_map = {}
value_map = {}

# حالا فقط دو کلاس داریم: bug-free و buggy
label_map = {'bug-free': 0, 'buggy': 1}

def encode_categorical(val, mapping):
    if val not in mapping:
        mapping[val] = len(mapping)
    return mapping[val]

def load_graph_from_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    x_list = []
    y_list = []
    id_map = {}
    for idx, node in enumerate(data['nodes']):
        node_id = node[0]
        id_map[node_id] = idx
        type_feat = encode_categorical(node[1], type_map)
        val_feat = encode_categorical(node[2], value_map)
        
        # تبدیل برچسب‌های سه‌گانه به دو دسته:
        original_label = node[3]
        if original_label == 'bug-free':
            label = 0
        else:
            label = 1  # insert-node و update هر دو در buggy

        x_list.append([type_feat, val_feat])
        y_list.append(label)

    x = torch.tensor(x_list, dtype=torch.float)
    y = torch.tensor(y_list, dtype=torch.long)

    edge_index = [[], []]
    for edge in data['edges']:
        src, dst, _ = edge
        if src in id_map and dst in id_map:
            edge_index[0].append(id_map[src])
            edge_index[1].append(id_map[dst])

    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_index = edge_index.t().contiguous().t()

    return Data(x=x, edge_index=edge_index, y=y)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.convs = ModuleList([
            GCNConv(in_channels, hidden_channels),
            GCNConv(hidden_channels, hidden_channels),
            GCNConv(hidden_channels, hidden_channels),
        ])
        self.classifier = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        for conv in self.convs:
            x = conv(x, edge_index)
            x = F.relu(x)
        return self.classifier(x)

def train(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = F.cross_entropy(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def test(model, loader, device):
    model.eval()
    y_true = []
    y_pred = []
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch.x, batch.edge_index)
            pred = out.argmax(dim=1)
            y_true.extend(batch.y.tolist())
            y_pred.extend(pred.tolist())
            correct += pred.eq(batch.y).sum().item()
            total += batch.y.size(0)
    acc = correct / total
    return acc, y_true, y_pred

def main(json_files, batch_size=1, epochs=20, hidden_dim=64, lr=0.005):
    dataset = [load_graph_from_json(f) for f in json_files]
    loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
    
    in_dim = dataset[0].x.size(1)
    out_dim = 2  # دو کلاس داریم

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCN(in_dim, hidden_dim, out_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs + 1):
        loss = train(model, loader, optimizer, device)
        acc, y_true, y_pred = test(model, loader, device)
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

    print("\n\U0001F4CA Classification Report:")
    print(classification_report(y_true, y_pred, target_names=['bug-free', 'buggy']))
    print("\n\U0001F9EE Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

if __name__ == '__main__':
    json_files = [
        "C:/Users/Leila/datasetpy/buggy/output_graph_with_synthetic.json",
        # می‌تونی فایل‌های دیگه رو هم اضافه کنی
    ]
    main(json_files, batch_size=1, epochs=20, hidden_dim=64, lr=0.005)




Epoch 1, Loss: 0.8210, Accuracy: 0.7662
Epoch 2, Loss: 0.6101, Accuracy: 0.7536
Epoch 3, Loss: 0.5712, Accuracy: 0.7905
Epoch 4, Loss: 0.4851, Accuracy: 0.8178
Epoch 5, Loss: 0.3980, Accuracy: 0.7774
Epoch 6, Loss: 0.4168, Accuracy: 0.9217
Epoch 7, Loss: 0.3906, Accuracy: 0.9500
Epoch 8, Loss: 0.3320, Accuracy: 0.8209
Epoch 9, Loss: 0.3081, Accuracy: 0.8178
Epoch 10, Loss: 0.3148, Accuracy: 0.8162
Epoch 11, Loss: 0.3025, Accuracy: 0.8183
Epoch 12, Loss: 0.2758, Accuracy: 0.9309
Epoch 13, Loss: 0.2660, Accuracy: 0.9272
Epoch 14, Loss: 0.2671, Accuracy: 0.9442
Epoch 15, Loss: 0.2663, Accuracy: 0.9434
Epoch 16, Loss: 0.2444, Accuracy: 0.9419
Epoch 17, Loss: 0.2438, Accuracy: 0.9505
Epoch 18, Loss: 0.2472, Accuracy: 0.9531
Epoch 19, Loss: 0.2432, Accuracy: 0.9531
Epoch 20, Loss: 0.2306, Accuracy: 0.9301

📊 Classification Report:
              precision    recall  f1-score   support

    bug-free       0.97      0.94      0.95      2926
       buggy       0.81      0.91      0.86       893
