In [None]:
import json
import torch
import torch.nn.functional as F
from torch.nn import Linear, ModuleList
from torch_geometric.nn import GCNConv
from torch_geometric.data import Data, DataLoader
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split

# نگاشت ویژگی‌های متنی به اعداد
type_map = {}
value_map = {}

# فقط دو کلاس: bug-free و buggy
label_map = {'bug-free': 0, 'buggy': 1}

def encode_categorical(val, mapping):
    if val not in mapping:
        mapping[val] = len(mapping)
    return mapping[val]

def load_graph_from_json(json_path):
    with open(json_path, 'r') as f:
        data = json.load(f)

    x_list = []
    y_list = []
    id_map = {}

    for idx, node in enumerate(data['nodes']):
        node_id = node[0]
        id_map[node_id] = idx
        type_feat = encode_categorical(node[1], type_map)
        val_feat = encode_categorical(node[2], value_map)

        original_label = node[3]
        label = 0 if original_label == 'bug-free' else 1  # insert-node و update → buggy

        x_list.append([type_feat, val_feat])
        y_list.append(label)

    x = torch.tensor(x_list, dtype=torch.float)
    y = torch.tensor(y_list, dtype=torch.long)

    edge_index = [[], []]
    for edge in data['edges']:
        src, dst, _ = edge
        if src in id_map and dst in id_map:
            edge_index[0].append(id_map[src])
            edge_index[1].append(id_map[dst])

    edge_index = torch.tensor(edge_index, dtype=torch.long)
    edge_index = edge_index.t().contiguous().t()

    return Data(x=x, edge_index=edge_index, y=y)

class GCN(torch.nn.Module):
    def __init__(self, in_channels, hidden_channels, out_channels):
        super(GCN, self).__init__()
        self.convs = ModuleList([
            GCNConv(in_channels, hidden_channels),
            GCNConv(hidden_channels, hidden_channels)
        ])
        self.classifier = Linear(hidden_channels, out_channels)

    def forward(self, x, edge_index):
        for i, conv in enumerate(self.convs):
            x = conv(x, edge_index)
            x = F.relu(x)
        return self.classifier(x)

def train(model, loader, optimizer, device):
    model.train()
    total_loss = 0
    for batch in loader:
        batch = batch.to(device)
        optimizer.zero_grad()
        out = model(batch.x, batch.edge_index)
        loss = F.cross_entropy(out, batch.y)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)

def test(model, loader, device):
    model.eval()
    y_true = []
    y_pred = []
    correct = 0
    total = 0
    with torch.no_grad():
        for batch in loader:
            batch = batch.to(device)
            out = model(batch.x, batch.edge_index)
            pred = out.argmax(dim=1)
            y_true.extend(batch.y.tolist())
            y_pred.extend(pred.tolist())
            correct += pred.eq(batch.y).sum().item()
            total += batch.y.size(0)
    acc = correct / total
    return acc, y_true, y_pred

def main(json_files, batch_size=1, epochs=30, hidden_dim=32, lr=0.005):
    dataset = [load_graph_from_json(f) for f in json_files]

    # تقسیم داده‌ها به train/test
    if len(dataset) > 1:
        train_set, test_set = train_test_split(dataset, test_size=0.2, random_state=42)
    else:
        train_set = test_set = dataset

    train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False)

    in_dim = dataset[0].x.size(1)
    out_dim = 2  # چون دودسته‌ای است

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model = GCN(in_dim, hidden_dim, out_dim).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)

    for epoch in range(1, epochs + 1):
        loss = train(model, train_loader, optimizer, device)
        acc, _, _ = test(model, test_loader, device)
        print(f"Epoch {epoch}, Loss: {loss:.4f}, Accuracy: {acc:.4f}")

    print("\n📊 Classification Report:")
    _, y_true, y_pred = test(model, test_loader, device)
    print(classification_report(y_true, y_pred, target_names=['bug-free', 'buggy']))
    print("\n🧮 Confusion Matrix:")
    print(confusion_matrix(y_true, y_pred))

if __name__ == '__main__':
    json_files = [
        "C:/Users/Leila/datasetpy/buggy/output_graph_with_synthetic.json",
        # فایل‌های بیشتر اضافه کن برای تست واقعی‌تر
    ]
    main(json_files)
