<a href="https://colab.research.google.com/github/mangohehe/DeepGate2/blob/data-learning/notebooks/Data-Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#@markdown Git clone DeepGate2 Repo
!git clone https://github.com/mangohehe/DeepGate2.git

In [None]:
#@markdown Run to mount Google Drive

from google.colab import drive
drive.mount('/content/drive')

%cd /content/DeepGate2/data/train
%pwd

In [None]:
#@markdown Run to unzip training data

%cd /content/DeepGate2/dataset
!tar -jxvf rawaig.tar.bz2

In [6]:
import os
from collections import defaultdict
import pandas as pd
# Import the data_table module
from google.colab import data_table
from collections import defaultdict, Counter
# Enable the DataTable formatter globally
data_table.enable_dataframe_formatter()

RAW_AIG_DIR = "/content/DeepGate2/dataset/rawaig" #@param

In [43]:
#@title utils

#@markdown def analyze_bench_files(bench_dir)

def analyze_bench_files(bench_dir):
    """Analyze .bench files in the specified directory and return a summary DataFrame."""

    # Initialize a defaultdict to store counts for each circuit
    circuit_data = defaultdict(lambda: {
        'Number of Files': 0, 'Inputs': 0, 'Outputs': 0, 'AND': 0, 'NOT': 0, 'OR': 0,
        'NAND': 0, 'NOR': 0, 'XOR': 0, 'XNOR': 0, 'Others': Counter(), 'Total Gates': 0
    })

    # Check if the directory exists before proceeding
    if not os.path.exists(bench_dir) or not os.path.isdir(bench_dir):
        print(f"Directory {bench_dir} not found or isn't a directory.")
        return pd.DataFrame()

    # Initialize a global counter for unknown instance types
    global_unknown_types = Counter()

    # Iterate through the files in the directory
    for filename in os.listdir(bench_dir):
        # Process only files ending with the .bench extension
        if filename.endswith(".bench"):
            circuit_name = filename.split('_')[0]
            circuit_data[circuit_name]['Number of Files'] += 1

            # Initialize counters for gates and pins
            inputs = outputs = and_gates = not_gates = or_gates = nand_gates = nor_gates = xor_gates = xnor_gates = 0
            unknown_types = Counter()

            # Open and parse the .bench file
            file_path = os.path.join(bench_dir, filename)
            try:
                with open(file_path, "r") as f:
                    for line in f:
                        line = line.strip()
                        if line.startswith("INPUT"):
                            inputs += 1
                        elif line.startswith("OUTPUT"):
                            outputs += 1
                        elif "= AND" in line:
                            and_gates += 1
                        elif "= NOT" in line:
                            not_gates += 1
                        elif "= OR" in line:
                            or_gates += 1
                        elif "= NAND" in line:
                            nand_gates += 1
                        elif "= NOR" in line:
                            nor_gates += 1
                        elif "= XOR" in line:
                            xor_gates += 1
                        elif "= XNOR" in line:
                            xnor_gates += 1
                        else:
                            if "=" in line:
                                gate_type = line.split('=')[1].strip().split()[0]
                                unknown_types[gate_type] += 1

                # Aggregate known gate counts
                circuit_data[circuit_name]['Inputs'] += inputs
                circuit_data[circuit_name]['Outputs'] += outputs
                circuit_data[circuit_name]['AND'] += and_gates
                circuit_data[circuit_name]['NOT'] += not_gates
                circuit_data[circuit_name]['OR'] += or_gates
                circuit_data[circuit_name]['NAND'] += nand_gates
                circuit_data[circuit_name]['NOR'] += nor_gates
                circuit_data[circuit_name]['XOR'] += xor_gates
                circuit_data[circuit_name]['XNOR'] += xnor_gates
                circuit_data[circuit_name]['Others'].update(unknown_types)

                # Sum all gates excluding inputs and outputs
                total_gates = (
                    and_gates + not_gates + or_gates + nand_gates +
                    nor_gates + xor_gates + xnor_gates + sum(unknown_types.values())
                )
                circuit_data[circuit_name]['Total Gates'] += total_gates

                # Update the global unknown types counter
                global_unknown_types.update(unknown_types)

            except IOError as e:
                print(f"Error reading file {filename}: {e}")

    # Convert to DataFrame and handle the Others column as a string
    df = pd.DataFrame.from_dict(circuit_data, orient='index')
    df['Others'] = df['Others'].apply(dict)

    # Add a total summary row
    df.loc['Total'] = df.sum(numeric_only=True)
    df.at['Total', 'Number of Files'] = df['Number of Files'].sum()
    df.at['Total', 'Others'] = dict(global_unknown_types)
    df.index.name = 'Circuit'

    # Log all unknown gate types globally
    print("Global unknown gate types across all circuits:", dict(global_unknown_types))

    return df

import os
from collections import defaultdict
import fnmatch

#@markdown def analyze_fanin_fanout(bench_dir, pattern="log2.*.bench")

def analyze_fanin_fanout(bench_dir, pattern="log2.*.bench"):
    """Analyze fan-in and fan-out for .bench files matching the given pattern."""
    fanin_counts = defaultdict(int)
    fanout_graph = defaultdict(list)

    # Check if the directory exists
    if not os.path.exists(bench_dir) or not os.path.isdir(bench_dir):
        print(f"Directory {bench_dir} not found or isn't a directory.")
        return None

    # Iterate through each file in the directory
    for filename in os.listdir(bench_dir):
        if fnmatch.fnmatch(filename, pattern):
            file_path = os.path.join(bench_dir, filename)

            try:
                with open(file_path, "r") as f:
                    for line in f:
                        line = line.strip()
                        if "=" in line:
                            # Parse the gate and its input connections
                            print(line)
                            gate, expression = line.split("=")
                            gate = gate.strip()
                            inputs = expression.strip().split(",")
                            fanin_counts[gate] = len(inputs)
                            for input_gate in inputs:
                                input_gate = input_gate.strip()
                                fanout_graph[input_gate].append(gate)

            except IOError as e:
                print(f"Error reading file {filename}: {e}")

    # Calculate fan-in and fan-out values
    fanin_values = list(fanin_counts.values())
    fanout_counts = {k: len(v) for k, v in fanout_graph.items()}
    fanout_values = list(fanout_counts.values())

    # Summary statistics
    avg_fanin = sum(fanin_values) / len(fanin_values) if fanin_values else 0
    avg_fanout = sum(fanout_values) / len(fanout_values) if fanout_values else 0

    print(f"Average Fan-In: {avg_fanin:.2f}")
    print(f"Average Fan-Out: {avg_fanout:.2f}")

    return {
        "fanin_counts": fanin_counts,
        "fanout_counts": fanout_counts,
        "avg_fanin": avg_fanin,
        "avg_fanout": avg_fanout
    }


import numpy as np
import pandas as pd

#@markdown def analyze_bench_files(bench_dir, pattern="log2.*.bench")

def load_circuit_data_to_df(graphs_file, labels_file):
    """Load and convert the circuit data into a DataFrame."""

    # Load graph data
    graph_data = np.load(graphs_file, allow_pickle=True)
    label_data = np.load(labels_file, allow_pickle=True)

    # Extract graph circuits and label data
    circuits = graph_data['circuits'].item()
    labels = label_data['labels'].item()

    # Create a list to store circuit information
    circuit_info = []

    # Iterate over circuits and gather details
    for circuit_name, graph_info in circuits.items():
        x = graph_info['x']  # Node feature matrix
        edge_index = graph_info['edge_index']  # Edge index matrix

        circuit_labels = labels[circuit_name]
        tt_pair_index = circuit_labels['tt_pair_index']  # Index pairs for truth table comparison
        tt_dis = circuit_labels['tt_dis']  # Truth table distances

        # Gather circuit data into a dictionary
        circuit_info.append({
            'Circuit Name': circuit_name,
            'Number of Nodes': x.shape[0],
            'Number of Edges': edge_index.shape[1],
            'Number of TT Pairs': tt_pair_index.shape[0],
            'Number of TT Distances': tt_dis.shape[0]
        })

    # Convert to a DataFrame
    df = pd.DataFrame(circuit_info)

    return df

In [40]:
# Call the function with the directory path
df = analyze_bench_files(RAW_AIG_DIR)
df = df.loc[:, (df != 0).any(axis=0)]

Global unknown gate types across all circuits: {}


In [41]:
df.describe()

Unnamed: 0,Number of Files,Inputs,Outputs,AND,NOT,Total Gates
count,92.0,92.0,92.0,92.0,92.0,92.0
mean,332.021739,12141.652174,2024.413043,33320.72,31365.78,64686.5
std,2123.040506,58956.327214,10005.952739,161467.3,151791.9,313075.0
min,1.0,5.0,1.0,6.0,7.0,13.0
25%,10.0,317.5,59.0,1035.0,1204.5,2270.5
50%,20.0,887.5,156.5,2542.5,2815.0,5387.0
75%,80.25,5797.5,728.25,13938.0,17989.0,33129.75
max,20364.0,558516.0,93123.0,1532753.0,1442826.0,2975579.0


In [42]:
df.head(20)

Unnamed: 0_level_0,Number of Files,Inputs,Outputs,AND,NOT,Others,Total Gates
Circuit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1
mem,653.0,47359.0,4144.0,78469.0,80196.0,{},158665.0
wbscope,1323.0,16365.0,20812.0,110800.0,53296.0,{},164096.0
vga,690.0,41223.0,1766.0,80256.0,65185.0,{},145441.0
multiplier,295.0,27597.0,1790.0,112717.0,139608.0,{},252325.0
b18,686.0,61615.0,6752.0,129655.0,113525.0,{},243180.0
b22,209.0,12892.0,1414.0,29516.0,38704.0,{},68220.0
ethernet,432.0,11114.0,6414.0,35414.0,18256.0,{},53670.0
b20,150.0,6516.0,1460.0,13515.0,19683.0,{},33198.0
ac97,36.0,1725.0,585.0,3051.0,3008.0,{},6059.0
wb,347.0,17148.0,2381.0,41506.0,40493.0,{},81999.0


In [31]:
# Example usage
fanin_fanout_data = analyze_fanin_fanout(RAW_AIG_DIR, pattern="c17_syn_*.bench")

fanin_fanout_data

c17_syn_000.bench
5 = NOT(11)
6 = NOT(12)
7 = NOT(13)
8 = NOT(14)
9 = NOT(17)
10 = AND(5, 9)
11 = AND(0, 1)
12 = AND(2, 5)
13 = AND(1, 3)
14 = AND(6, 7)
15 = NOT(2)
16 = NOT(4)
17 = AND(15, 16)
Average Fan-In: 1.46
Average Fan-Out: 1.00


{'fanin_counts': defaultdict(int,
             {'5': 1,
              '6': 1,
              '7': 1,
              '8': 1,
              '9': 1,
              '10': 2,
              '11': 2,
              '12': 2,
              '13': 2,
              '14': 2,
              '15': 1,
              '16': 1,
              '17': 2}),
 'fanout_counts': {'NOT(11)': 1,
  'NOT(12)': 1,
  'NOT(13)': 1,
  'NOT(14)': 1,
  'NOT(17)': 1,
  'AND(5': 1,
  '9)': 1,
  'AND(0': 1,
  '1)': 1,
  'AND(2': 1,
  '5)': 1,
  'AND(1': 1,
  '3)': 1,
  'AND(6': 1,
  '7)': 1,
  'NOT(2)': 1,
  'NOT(4)': 1,
  'AND(15': 1,
  '16)': 1},
 'avg_fanin': 1.4615384615384615,
 'avg_fanout': 1.0}

In [33]:
#@markdown Run to load processed data

import zipfile
import os

# Path to the zip file in Google Drive
zip_path = '/content/drive/My Drive/DeepGate2/train.zip'
train_data = '/content/DeepGate2/data/train'
train_data_dir = '/content/DeepGate2/data'

# Remove the folder if it exists
if os.path.exists(train_data):
    !rm -rf {train_data}

# Unzipping the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(train_data_dir)



In [6]:
#@markdown Run to load pre-trained model

%cd /content/DeepGate2/
%cp /content/drive/MyDrive/deepgate_exp_backup.zip /content/DeepGate2/
!unzip /content/DeepGate2/deepgate_exp_backup.zip

/content/DeepGate2
cp: cannot stat '/content/drive/MyDrive/deepgate_exp_backup.zip': No such file or directory
unzip:  cannot find or open /content/DeepGate2/deepgate_exp_backup.zip, /content/DeepGate2/deepgate_exp_backup.zip.zip or /content/DeepGate2/deepgate_exp_backup.zip.ZIP.


In [44]:
# Replace these file paths with the actual paths to your `.npz` files
graphs_file = '/content/DeepGate2/data/train/graphs.npz'
labels_file = '/content/DeepGate2/data/train/labels.npz'

# Load and analyze the graph and label data
circuit_df = load_circuit_data_to_df(graphs_file, labels_file)

In [48]:
circuit_df.head(50)

Unnamed: 0,Circuit Name,Number of Nodes,Number of Edges,Number of TT Pairs,Number of TT Distances
0,pci_syn_031,479,2,39,39
1,arbiter_syn_111,207,2,0,0
2,pci_syn_074,175,2,42,42
3,b19_C_528,685,2,59,59
4,b18_C_296,504,2,54,54
5,arbiter_syn_154,216,2,0,0
6,vga_lcd_syn_439,245,2,28,28
7,s1423_syn_000,343,2,21,21
8,wbscope_syn_856,150,2,219,219
9,DMA_syn_026,201,2,5,5


In [47]:
circuit_name_to_find = 'c17_syn_000' #@param
filtered_circuit = circuit_df[circuit_df['Circuit Name'] == circuit_name_to_find]
filtered_circuit

Unnamed: 0,Circuit Name,Number of Nodes,Number of Edges,Number of TT Pairs,Number of TT Distances
7023,c17_syn_000,18,2,0,0
