In [1]:
!pip install networkx matplotlib



In [11]:
import re
import numpy as np
import networkx as nx
import json
from scipy.sparse import lil_matrix, save_npz

# ===== Step 1: 解析 Verilog 檔案 =====
def parse_verilog(verilog_code):
    gates = []
    primary_inputs = []
    primary_outputs = []
    for line in verilog_code.splitlines():
        line = line.strip()
        print(f"Processing line: {line}")

        # check if line starts with input, such as: input n0, n1, n2;
        # or such as: input [11:0] n3, n4;
        # then n0, n1, n2, n3, n4 are primary inputs
        if line.startswith('input'):
            # 解析 primary inputs
            inputs = re.findall(r'\b(\w+)\b', line)
            primary_inputs.extend(inputs[1:])
            continue

        # check if line starts with output, such as: output n6, n7;
        # or such as: output [11:0] n8;
        # then n6, n7, n8 are primary outputs
        if line.startswith('output'):
            # 解析 primary outputs
            outputs = re.findall(r'\b(\w+)\b', line)
            primary_outputs.extend(outputs[1:])
            continue
        
        # 解析 BUF gate
        buf_match = re.match(r'^\s*buf\s+(\S+)\((\S+),\s*(\S+)\);', line)
        if buf_match:
            gates.append(('BUF', buf_match.group(1), buf_match.group(2), buf_match.group(3)))  # buf, output, input
            continue

        # 解析 DFF gate（具名端口的 DFF gate，處理 .RN, .SN 等）
        dff_match = re.match(r'^\s*dff\s+(\S+)\s*\(\.RN\(([^)]+)\),\s*\.SN\(([^)]+)\),\s*\.CK\(([^)]+)\),\s*\.D\(([^)]+)\),\s*\.Q\(([^)]+)\)\);', line)
        if dff_match:
            gates.append(('DFF', dff_match.group(1), dff_match.group(2), dff_match.group(3),
                          dff_match.group(4), dff_match.group(5), dff_match.group(6)))  # dff, RN, SN, CK, D, Q
            continue

        # 解析 OR gate (雙輸入 gate)
        or_match = re.match(r'^\s*or\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if or_match:
            gates.append(('OR', or_match.group(1), or_match.group(2), or_match.group(3), or_match.group(4)))  # or, output, input1, input2
            continue

        # 解析 NOR gate (雙輸入 gate)
        nor_match = re.match(r'^\s*nor\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if nor_match:
            gates.append(('NOR', nor_match.group(1), nor_match.group(2), nor_match.group(3), nor_match.group(4)))  # nor, output, input1, input2
            continue

        # 解析 NOT gate (單輸入 gate)
        not_match = re.match(r'^\s*not\s+(\S+)\((\S+)\s*,\s*(\S+)\);', line)
        if not_match:
            gates.append(('NOT', not_match.group(1), not_match.group(2), not_match.group(3)))  # not, output, input
            continue

        # 解析 XOR gate (雙輸入 gate)
        xor_match = re.match(r'^\s*xor\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if xor_match:
            gates.append(('XOR', xor_match.group(1), xor_match.group(2), xor_match.group(3), xor_match.group(4)))  # xor, output, input1, input2
            continue

        # 解析 AND gate (雙輸入 gate)
        and_match = re.match(r'^\s*and\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if and_match:
            gates.append(('AND', and_match.group(1), and_match.group(2), and_match.group(3), and_match.group(4)))  # and, output, input1, input2
            continue

        # 解析 NAND gate (雙輸入 gate)
        nand_match = re.match(r'^\s*nand\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if nand_match:
            gates.append(('NAND', nand_match.group(1), nand_match.group(2), nand_match.group(3), nand_match.group(4)))  # nand, output, input1, input2
            continue

    #print(gates)
    return gates, primary_inputs, primary_outputs

# ===== Step 2: 轉換成 infolist 格式 =====
def gates_to_infolist(gates, trojan_gates=[]):
    infolist = []
    for g in gates:
        gtype = g[0]
        instname = g[1]
        output = g[2]
        inputs = list(g[3:])

        portnames = ['Y'] + [f'A{i+1}' for i in range(len(inputs))]
        connnames = [output] + inputs

        is_trojan = (instname in trojan_gates or output in trojan_gates or any(inp in trojan_gates for inp in inputs))
        infolist.append((
            gtype, gtype, instname, instname, portnames, connnames, is_trojan
        ))
    return infolist

# ===== Step 3: 建立 adjacency matrix & features =====
def build_lookup(infolist):
    lookup = {}
    for i, info in enumerate(infolist):
        conns = info[5]  # connection names
        for conn in conns[1:]:  # skip output
            if conn not in lookup:
                lookup[conn] = []
            lookup[conn].append(i)
    return lookup

def build_graph_features(infolist, primary_inputs=None):
    numnodes = len(infolist) + len(primary_inputs)
    adj = lil_matrix((numnodes, numnodes), dtype=bool)
    class_map = {}
    train_indices = list(range(numnodes))  # 全部都當 train

    gatelist = sorted(list(set([x[0] for x in infolist])))
    gatelookup = {g: i for i, g in enumerate(gatelist)}

    # feature: one-hot + in degree + out degree
    feats = np.zeros((numnodes, len(gatelist) + 2))

    lookup = build_lookup(infolist)

    for i, info in enumerate(infolist):
        gatetype = info[0]
        conns = info[5]
        feats[i][gatelookup[gatetype]] = 1

        # Loretta
        output_wire = conns[0]  # output wire

        if output_wire in lookup:
            for j in lookup[output_wire]:
                if i != j:
                    adj[i, j] = True
                    feats[i][-1] += 1
                    feats[j][-2] += 1  # in degree

        class_map[i] = 1 if info[6] else 0
        # gate

        # for conn in conns[1:]:  # skip output
        #     if conn in lookup:
        #         for j in lookup[conn]:
        #             if i != j:
        #                 adj[i, j] = True
        #                 feats[i][-1] += 1  # out
        #                 feats[j][-2] += 1  # in
                        

        # class_map[i] = 1 if info[6] else 0  # is_trojan
        # This is a binary classification, 1 for trojan, 0 for normal
    
    for i, pi in enumerate(primary_inputs):
        for j, lookup_pi in enumerate(lookup[pi]):
            adj[i+len(infolist), lookup_pi] = True # primary input to gate
            class_map[i+len(infolist)] = 0  # primary inputs are not trojan
    return adj, feats, train_indices, class_map

# ===== Step 4: 儲存 GraphSAGE 所需格式 =====
def save_graphsage_format(adj, feats, class_map, train_indices):
    save_npz("adj_full.npz", adj.tocsr())
    save_npz("adj_train.npz", adj.tocsr())  # 簡化處理：用一樣的

    np.save("feats.npy", feats, allow_pickle=False)

    with open("class_map.json", "w") as f:
        json.dump(class_map, f)

    with open("role.json", "w") as f:
        json.dump({'tr': train_indices, 'va': [], 'te': []}, f)

# ===== 主流程 =====
def process_single_verilog(filepath, gt_trojan_filepath):
    with open(filepath, 'r') as f:
        code = f.read()

    gates, primary_inputs, primary_outputs = parse_verilog(code)
    print(f"Parsed {len(gates)} gates, {len(primary_inputs)} primary inputs, {len(primary_outputs)} primary outputs.")
    # 若無trojan gates, txt只有一行: NO_TROJAN
    # 若有trojan gates, txt第一行是 "TROJANED", 第二行是 "TROJAN_GATES", 最後一行是 "END_TROJAN_GATES"
    trojan_gates = []
    with open(gt_trojan_filepath, 'r') as f:
        lines = [l.strip() for l in f]
        if lines and lines[0] == "TROJANED":
            for line in lines[2:]:
                if line == "END_TROJAN_GATES":
                    break
                trojan_gates.append(line)
        else:
            trojan_gates = []
    infolist = gates_to_infolist(gates, trojan_gates)
    adj, feats, train_indices, class_map = build_graph_features(infolist, primary_inputs)
    save_graphsage_format(adj, feats, class_map, train_indices)

    print("✅ Graph feature files saved.")

if __name__ == "__main__":
    process_single_verilog("test/design3.v", "result0.txt")  # 改成你的檔案


Processing line: module top(n0, n1, n2, n3, n4, n5);
Processing line: input n0, n1;
Processing line: input [127:0] n2;
Processing line: output [63:0] n3, n4, n5;
Processing line: wire n0, n1;
Processing line: wire [127:0] n2;
Processing line: wire [63:0] n3, n4, n5;
Processing line: wire [2:0] n6;
Processing line: wire [15:0] n7;
Processing line: wire [7:0] n8;
Processing line: wire n9, n10, n11, n12, n13, n14, n15, n16;
Processing line: wire n17, n18, n19, n20, n21, n22, n23, n24;
Processing line: wire n25, n26, n27, n28, n29, n30, n31, n32;
Processing line: wire n33, n34, n35, n36, n37, n38, n39, n40;
Processing line: wire n41, n42, n43, n44, n45, n46, n47, n48;
Processing line: wire n49, n50, n51, n52, n53, n54, n55, n56;
Processing line: wire n57, n58, n59, n60, n61, n62, n63, n64;
Processing line: wire n65, n66, n67, n68, n69, n70, n71, n72;
Processing line: wire n73, n74, n75, n76, n77, n78, n79, n80;
Processing line: wire n81, n82, n83, n84, n85, n86, n87, n88;
Processing line: 

FileNotFoundError: [Errno 2] No such file or directory: 'result0.txt'