In [1]:
!git clone https://github.com/lorettayao/Cad-contest.git


Cloning into 'Cad-contest'...
remote: Enumerating objects: 88, done.[K
remote: Counting objects: 100% (88/88), done.[K
remote: Compressing objects: 100% (53/53), done.[K
remote: Total 88 (delta 42), reused 74 (delta 34), pack-reused 0 (from 0)[K
Receiving objects: 100% (88/88), 1.83 MiB | 5.44 MiB/s, done.
Resolving deltas: 100% (42/42), done.


In [None]:
!pip install networkx matplotlib



In [None]:
import re
import numpy as np
import networkx as nx
import json
from scipy.sparse import lil_matrix, save_npz
from collections import deque

# ===== Step 0: BFS Level 計算 =====
# Loretta： BFS
def compute_bfxs_levels(adj, start_nodes):
    num_nodes = adj.shape[0]
    levels = [-1] * num_nodes  # -1 表示沒被訪問過
    queue = deque()

    for s in start_nodes:
        levels[s] = 0
        queue.append(s)

    while queue:
        node = queue.popleft()
        for neighbor in adj.rows[node]:  # lil_matrix 的 row 可直接抓 list of neighbors
            if levels[neighbor] == -1:
                levels[neighbor] = levels[node] + 1
                queue.append(neighbor)
    return levels

# ===== Step 1: 解析 Verilog 檔案 =====
def parse_verilog(verilog_code):
    gates = []
    primary_inputs = ["1'b1","1'b0"]
    primary_outputs = []
    for line in verilog_code.splitlines():
        line = line.strip()
        #print(f"Processing line: {line}")

        if line.startswith('input'):
            line = line.strip().rstrip(';')
            line = line[len('input'):].strip()

            vector_range = None

            # Check for vector declaration like [11:0]
            if line.startswith('['):
                closing_bracket_index = line.find(']')
                vector_range_str = line[1:closing_bracket_index]
                msb, lsb = map(int, vector_range_str.split(':'))
                vector_range = range(lsb, msb + 1) if lsb <= msb else range(lsb, msb - 1, -1)
                line = line[closing_bracket_index + 1:].strip()

            # Split remaining part into signal names
            signals = [name.strip() for name in line.split(',')]

            for sig in signals:
                if vector_range:
                    primary_inputs.extend([f"{sig}[{i}]" for i in vector_range])
                else:
                    primary_inputs.append(sig)

        # check if line starts with output, such as: output n6, n7;
        # or such as: output [11:0] n8;
        # then n6, n7, n8 are primary outputs
        if line.startswith('output'):
            line = line.strip().rstrip(';')
            line = line[len('output'):].strip()

            vector_range = None
            # Check for vector declaration like [11:0]
            if line.startswith('['):
                closing_bracket_index = line.find(']')
                vector_range_str = line[1:closing_bracket_index]
                msb, lsb = map(int, vector_range_str.split(':'))
                vector_range = range(lsb, msb + 1) if lsb <= msb else range(lsb, msb - 1, -1)
                line = line[closing_bracket_index + 1:].strip()
            # Split remaining part into signal names
            signals = [name.strip() for name in line.split(',')]
            for sig in signals:
                if vector_range:
                    primary_outputs.extend([f"{sig}[{i}]" for i in vector_range])
                else:
                    primary_outputs.append(sig)

        # 解析 BUF gate
        buf_match = re.match(r'^\s*buf\s+(\S+)\((\S+),\s*(\S+)\);', line)
        if buf_match:
            gates.append(('BUF', buf_match.group(1), buf_match.group(2), buf_match.group(3)))  # buf, output, input
            continue

        # 解析 DFF gate（具名端口的 DFF gate，處理 .RN, .SN 等）
        dff_match = re.match(r'^\s*dff\s+(\S+)\s*\(\.RN\(([^)]+)\),\s*\.SN\(([^)]+)\),\s*\.CK\(([^)]+)\),\s*\.D\(([^)]+)\),\s*\.Q\(([^)]+)\)\);', line)
        if dff_match:
            gates.append(('DFF', dff_match.group(1), dff_match.group(2), dff_match.group(3),
                          dff_match.group(4), dff_match.group(5), dff_match.group(6)))  # dff, RN, SN, CK, D, Q
            continue

        # 解析 OR gate (雙輸入 gate)
        or_match = re.match(r'^\s*or\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if or_match:
            gates.append(('OR', or_match.group(1), or_match.group(2), or_match.group(3), or_match.group(4)))  # or, output, input1, input2
            continue

        # 解析 NOR gate (雙輸入 gate)
        nor_match = re.match(r'^\s*nor\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if nor_match:
            gates.append(('NOR', nor_match.group(1), nor_match.group(2), nor_match.group(3), nor_match.group(4)))  # nor, output, input1, input2
            continue

        # 解析 NOT gate (單輸入 gate)
        not_match = re.match(r'^\s*not\s+(\S+)\((\S+)\s*,\s*(\S+)\);', line)
        if not_match:
            gates.append(('NOT', not_match.group(1), not_match.group(2), not_match.group(3)))  # not, output, input
            continue

        # 解析 XOR gate (雙輸入 gate)
        xor_match = re.match(r'^\s*xor\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if xor_match:
            gates.append(('XOR', xor_match.group(1), xor_match.group(2), xor_match.group(3), xor_match.group(4)))  # xor, output, input1, input2
            continue

        # 解析 AND gate (雙輸入 gate)
        and_match = re.match(r'^\s*and\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if and_match:
            gates.append(('AND', and_match.group(1), and_match.group(2), and_match.group(3), and_match.group(4)))  # and, output, input1, input2
            continue

        # 解析 NAND gate (雙輸入 gate)
        nand_match = re.match(r'^\s*nand\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if nand_match:
            gates.append(('NAND', nand_match.group(1), nand_match.group(2), nand_match.group(3), nand_match.group(4)))  # nand, output, input1, input2
            continue

        # 解析 XNOR gate (雙輸入 gate)
        xnor_match = re.match(r'^\s*xnor\s+(\S+)\((\S+)\s*,\s*(\S+)\s*,\s*(\S+)\);', line)
        if xnor_match:
            gates.append(('XNOR', xnor_match.group(1), xnor_match.group(2), xnor_match.group(3), xnor_match.group(4)))  # xor, output, input1, input2
            continue
    #print(gates)
    return gates, primary_inputs, primary_outputs
# ===== Step 2: 轉換成 infolist 格式 =====
def gates_to_infolist(gates, trojan_gates=[]):
    infolist = []
    for g in gates:
        gtype = g[0]
        instname = g[1]
        output = g[2]
        inputs = list(g[3:])

        portnames = ['Y'] + [f'A{i+1}' for i in range(len(inputs))]
        connnames = [output] + inputs

        is_trojan = (instname in trojan_gates or output in trojan_gates or any(inp in trojan_gates for inp in inputs))
        infolist.append((
            gtype, gtype, instname, instname, portnames, connnames, is_trojan
        ))
        #print("connnames = ",connnames)
    return infolist

# ===== Step 3: 建立 adjacency matrix & features =====


def build_lookup(infolist):
    lookup = {}
    for i, info in enumerate(infolist):
        conns = info[5]  # connection names
        for conn in conns[1:]:  # skip output
            if conn not in lookup:
                lookup[conn] = []
            lookup[conn].append(i)
    return lookup

def build_graph_features(infolist, primary_inputs=None):
    numnodes = len(infolist) + len(primary_inputs)
    adj = lil_matrix((numnodes, numnodes), dtype=bool)
    class_map = {}
    train_indices = list(range(numnodes))  # 全部都當 train

    gatelist = sorted(list(set([x[0] for x in infolist])))
    gatelookup = {g: i for i, g in enumerate(gatelist)}

    # feature: one-hot + in degree + out degree + bfs detection?(Loretta :所以我把他變成+3喔)
    feats = np.zeros((numnodes, len(gatelist) + 3))
    gate_map={}
    lookup = build_lookup(infolist)

    for i, info in enumerate(infolist):
        gatetype = info[0]
        conns = info[5]
        feats[i][gatelookup[gatetype]] = 1

        # Loretta
        output_wire = conns[0]  # output wire

        if output_wire in lookup:
            for j in lookup[output_wire]:
                if i != j:
                    adj[i, j] = True
                    feats[i][-1] += 1  # out degree
                    feats[j][-2] += 1  # in degree

        class_map[i] = 1 if info[6] else 0
        gate_map[i] = info[2]

    for i, pi in enumerate(primary_inputs):
        if pi not in lookup:
            continue
        for j, lookup_pi in enumerate(lookup[pi]):
            adj[i+len(infolist), lookup_pi] = True # primary input to gate
            class_map[i+len(infolist)] = 0  # primary inputs are not trojan
            gate_map[i+len(infolist)] = pi
    # Loretta(BFS Level 計算)
    input_nodes = list(range(len(infolist), len(infolist) + len(primary_inputs)))
    bfs_levels = compute_bfs_levels(adj, input_nodes)

    for i in range(len(feats)):
        feats[i][-3] = bfs_levels[i] if bfs_levels[i] != -1 else 0  # 沒有訪問到的設為 0

    return adj, feats, train_indices, class_map, gate_map

# ===== Step 4: 儲存 GraphSAGE 所需格式 =====
def save_graphsage_format(adj, feats, class_map, train_indices, gate_map):
    save_npz("adj_full.npz", adj.tocsr())
    save_npz("adj_train.npz", adj.tocsr())  # 簡化處理：用一樣的

    np.save("feats.npy", feats, allow_pickle=False)

    with open("class_map.json", "w") as f:
        json.dump(class_map, f)

    with open("role.json", "w") as f:
        json.dump({'tr': train_indices, 'va': [], 'te': []}, f)

    with open("gate_map.json", "w") as f:  # <== 新增這段
        json.dump(gate_map, f)

# ===== 主流程 =====
def process_single_verilog(filepath, gt_trojan_filepath):
    with open(filepath, 'r') as f:
        code = f.read()

    gates, primary_inputs, primary_outputs = parse_verilog(code)
    print(f"Parsed {len(gates)} gates, {primary_inputs} primary inputs, {len(primary_outputs)} primary outputs.")
    # 若無trojan gates, txt只有一行: NO_TROJAN
    # 若有trojan gates, txt第一行是 "TROJANED", 第二行是 "TROJAN_GATES", 最後一行是 "END_TROJAN_GATES"
    trojan_gates = []
    with open(gt_trojan_filepath, 'r') as f:
        lines = [l.strip() for l in f]
        if lines and lines[0] == "TROJANED":
            for line in lines[2:]:
                if line == "END_TROJAN_GATES":
                    break
                trojan_gates.append(line)
        else:
            trojan_gates = []
    infolist = gates_to_infolist(gates, trojan_gates)
    adj, feats, train_indices, class_map, gate_map = build_graph_features(infolist, primary_inputs)
    save_graphsage_format(adj, feats, class_map, train_indices, gate_map)

    print("✅ Graph feature files saved.")

if __name__ == "__main__":
    process_single_verilog("test/design3.v", "result4.txt")  # 改成你的檔案


Parsed 1933 gates, ["1'b1", "1'b0", 'n0', 'n1', 'n2[0]', 'n2[1]', 'n2[2]', 'n2[3]', 'n2[4]', 'n2[5]', 'n2[6]', 'n2[7]', 'n2[8]', 'n2[9]', 'n2[10]', 'n2[11]', 'n2[12]', 'n2[13]', 'n2[14]', 'n2[15]', 'n2[16]', 'n2[17]', 'n2[18]', 'n2[19]', 'n2[20]', 'n2[21]', 'n2[22]', 'n2[23]', 'n2[24]', 'n2[25]', 'n2[26]', 'n2[27]', 'n2[28]', 'n2[29]', 'n2[30]', 'n2[31]', 'n2[32]', 'n2[33]', 'n2[34]', 'n2[35]', 'n2[36]', 'n2[37]', 'n2[38]', 'n2[39]', 'n2[40]', 'n2[41]', 'n2[42]', 'n2[43]', 'n2[44]', 'n2[45]', 'n2[46]', 'n2[47]', 'n2[48]', 'n2[49]', 'n2[50]', 'n2[51]', 'n2[52]', 'n2[53]', 'n2[54]', 'n2[55]', 'n2[56]', 'n2[57]', 'n2[58]', 'n2[59]', 'n2[60]', 'n2[61]', 'n2[62]', 'n2[63]', 'n2[64]', 'n2[65]', 'n2[66]', 'n2[67]', 'n2[68]', 'n2[69]', 'n2[70]', 'n2[71]', 'n2[72]', 'n2[73]', 'n2[74]', 'n2[75]', 'n2[76]', 'n2[77]', 'n2[78]', 'n2[79]', 'n2[80]', 'n2[81]', 'n2[82]', 'n2[83]', 'n2[84]', 'n2[85]', 'n2[86]', 'n2[87]', 'n2[88]', 'n2[89]', 'n2[90]', 'n2[91]', 'n2[92]', 'n2[93]', 'n2[94]', 'n2[95]', 'n

FileNotFoundError: [Errno 2] No such file or directory: 'result4.txt'

In [20]:
!git status


fatal: not a git repository (or any of the parent directories): .git
