<a href="https://colab.research.google.com/github/mangohehe/DeepGate2/blob/data-learning/notebooks/Data-Learning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#@markdown Git clone DeepGate2 Repo
!git clone https://github.com/mangohehe/DeepGate2.git

Cloning into 'DeepGate2'...
remote: Enumerating objects: 151, done.[K
remote: Counting objects: 100% (65/65), done.[K
remote: Compressing objects: 100% (52/52), done.[K
remote: Total 151 (delta 21), reused 32 (delta 10), pack-reused 86[K
Receiving objects: 100% (151/151), 52.52 MiB | 28.90 MiB/s, done.
Resolving deltas: 100% (37/37), done.


In [2]:
#@markdown Run to mount Google Drive

from google.colab import drive
drive.mount('/content/drive')

%cd /content/DeepGate2/data/train
%pwd

Mounted at /content/drive
/content/DeepGate2/data/train


'/content/DeepGate2/data/train'

In [3]:
#@markdown Run to unzip training data

%cd /content/DeepGate2/dataset
!tar -jxvf rawaig.tar.bz2

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
rawaig/b19_C_633.bench
rawaig/b19_C_634.bench
rawaig/b19_C_635.bench
rawaig/b19_C_636.bench
rawaig/b19_C_637.bench
rawaig/b19_C_638.bench
rawaig/b19_C_639.bench
rawaig/b19_C_640.bench
rawaig/b19_C_641.bench
rawaig/b19_C_642.bench
rawaig/b19_C_643.bench
rawaig/b21_opt_C_053.bench
rawaig/b21_opt_C_054.bench
rawaig/b21_opt_C_055.bench
rawaig/b21_opt_C_056.bench
rawaig/b21_opt_C_057.bench
rawaig/b21_opt_C_058.bench
rawaig/b21_opt_C_059.bench
rawaig/b22_C_091.bench
rawaig/b22_C_092.bench
rawaig/b22_C_093.bench
rawaig/b22_C_094.bench
rawaig/b22_C_095.bench
rawaig/b22_C_096.bench
rawaig/b22_C_097.bench
rawaig/b22_C_098.bench
rawaig/b22_C_099.bench
rawaig/b22_C_100.bench
rawaig/b22_C_101.bench
rawaig/b22_C_102.bench
rawaig/b22_C_103.bench
rawaig/b22_C_104.bench
rawaig/b22_C_105.bench
rawaig/b22_C_106.bench
rawaig/b22_opt_C_071.bench
rawaig/b22_opt_C_072.bench
rawaig/b22_opt_C_073.bench
rawaig/b22_opt_C_074.bench
rawaig/b22_opt_C_

In [29]:
import os
from collections import defaultdict
import pandas as pd
# Import the data_table module
from google.colab import data_table
from collections import defaultdict, Counter
# Enable the DataTable formatter globally
data_table.enable_dataframe_formatter()

RAW_AIG_DIR = "/content/DeepGate2/dataset/rawaig" #@param

In [32]:
#@markdown def analyze_bench_files(bench_dir)

def analyze_bench_files(bench_dir):
    """Analyze .bench files in the specified directory and return a summary DataFrame."""

    # Initialize a defaultdict to store counts for each circuit
    circuit_data = defaultdict(lambda: {
        'Number of Files': 0, 'Inputs': 0, 'Outputs': 0, 'AND': 0, 'NOT': 0, 'OR': 0,
        'NAND': 0, 'NOR': 0, 'XOR': 0, 'XNOR': 0, 'Others': Counter(), 'Total Gates': 0
    })

    # Check if the directory exists before proceeding
    if not os.path.exists(bench_dir) or not os.path.isdir(bench_dir):
        print(f"Directory {bench_dir} not found or isn't a directory.")
        return pd.DataFrame()

    # Initialize a global counter for unknown instance types
    global_unknown_types = Counter()

    # Iterate through the files in the directory
    for filename in os.listdir(bench_dir):
        # Process only files ending with the .bench extension
        if filename.endswith(".bench"):
            circuit_name = filename.split('_')[0]
            circuit_data[circuit_name]['Number of Files'] += 1

            # Initialize counters for gates and pins
            inputs = outputs = and_gates = not_gates = or_gates = nand_gates = nor_gates = xor_gates = xnor_gates = 0
            unknown_types = Counter()

            # Open and parse the .bench file
            file_path = os.path.join(bench_dir, filename)
            try:
                with open(file_path, "r") as f:
                    for line in f:
                        line = line.strip()
                        if line.startswith("INPUT"):
                            inputs += 1
                        elif line.startswith("OUTPUT"):
                            outputs += 1
                        elif "= AND" in line:
                            and_gates += 1
                        elif "= NOT" in line:
                            not_gates += 1
                        elif "= OR" in line:
                            or_gates += 1
                        elif "= NAND" in line:
                            nand_gates += 1
                        elif "= NOR" in line:
                            nor_gates += 1
                        elif "= XOR" in line:
                            xor_gates += 1
                        elif "= XNOR" in line:
                            xnor_gates += 1
                        else:
                            if "=" in line:
                                gate_type = line.split('=')[1].strip().split()[0]
                                unknown_types[gate_type] += 1

                # Aggregate known gate counts
                circuit_data[circuit_name]['Inputs'] += inputs
                circuit_data[circuit_name]['Outputs'] += outputs
                circuit_data[circuit_name]['AND'] += and_gates
                circuit_data[circuit_name]['NOT'] += not_gates
                circuit_data[circuit_name]['OR'] += or_gates
                circuit_data[circuit_name]['NAND'] += nand_gates
                circuit_data[circuit_name]['NOR'] += nor_gates
                circuit_data[circuit_name]['XOR'] += xor_gates
                circuit_data[circuit_name]['XNOR'] += xnor_gates
                circuit_data[circuit_name]['Others'].update(unknown_types)

                # Sum all gates excluding inputs and outputs
                total_gates = (
                    and_gates + not_gates + or_gates + nand_gates +
                    nor_gates + xor_gates + xnor_gates + sum(unknown_types.values())
                )
                circuit_data[circuit_name]['Total Gates'] += total_gates

                # Update the global unknown types counter
                global_unknown_types.update(unknown_types)

            except IOError as e:
                print(f"Error reading file {filename}: {e}")

    # Convert to DataFrame and handle the Others column as a string
    df = pd.DataFrame.from_dict(circuit_data, orient='index')
    df['Others'] = df['Others'].apply(dict)

    # Add a total summary row
    df.loc['Total'] = df.sum(numeric_only=True)
    df.at['Total', 'Number of Files'] = df['Number of Files'].sum()
    df.at['Total', 'Others'] = dict(global_unknown_types)
    df.index.name = 'Circuit'

    # Log all unknown gate types globally
    print("Global unknown gate types across all circuits:", dict(global_unknown_types))

    return df

In [None]:
# Call the function with the directory path
df = analyze_bench_files(RAW_AIG_DIR)

In [34]:
df.describe()

Unnamed: 0,Number of Files,Inputs,Outputs,AND,NOT,OR,NAND,NOR,XOR,XNOR,Total Gates
count,92.0,92.0,92.0,92.0,92.0,92.0,92.0,92.0,92.0,92.0,92.0
mean,332.021739,12141.652174,2024.413043,33320.72,31365.78,0.0,0.0,0.0,0.0,0.0,64686.5
std,2123.040506,58956.327214,10005.952739,161467.3,151791.9,0.0,0.0,0.0,0.0,0.0,313075.0
min,1.0,5.0,1.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,13.0
25%,10.0,317.5,59.0,1035.0,1204.5,0.0,0.0,0.0,0.0,0.0,2270.5
50%,20.0,887.5,156.5,2542.5,2815.0,0.0,0.0,0.0,0.0,0.0,5387.0
75%,80.25,5797.5,728.25,13938.0,17989.0,0.0,0.0,0.0,0.0,0.0,33129.75
max,20364.0,558516.0,93123.0,1532753.0,1442826.0,0.0,0.0,0.0,0.0,0.0,2975579.0


In [35]:
df

Unnamed: 0_level_0,Number of Files,Inputs,Outputs,AND,NOT,OR,NAND,NOR,XOR,XNOR,Others,Total Gates
Circuit,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
mem,653.0,47359.0,4144.0,78469.0,80196.0,0.0,0.0,0.0,0.0,0.0,{},158665.0
wbscope,1323.0,16365.0,20812.0,110800.0,53296.0,0.0,0.0,0.0,0.0,0.0,{},164096.0
vga,690.0,41223.0,1766.0,80256.0,65185.0,0.0,0.0,0.0,0.0,0.0,{},145441.0
multiplier,295.0,27597.0,1790.0,112717.0,139608.0,0.0,0.0,0.0,0.0,0.0,{},252325.0
b18,686.0,61615.0,6752.0,129655.0,113525.0,0.0,0.0,0.0,0.0,0.0,{},243180.0
...,...,...,...,...,...,...,...,...,...,...,...,...
b02,2.0,8.0,8.0,34.0,52.0,0.0,0.0,0.0,0.0,0.0,{},86.0
c17,1.0,5.0,2.0,6.0,7.0,0.0,0.0,0.0,0.0,0.0,{},13.0
square,1.0,58.0,1.0,105.0,130.0,0.0,0.0,0.0,0.0,0.0,{},235.0
b01,2.0,10.0,8.0,69.0,86.0,0.0,0.0,0.0,0.0,0.0,{},155.0


In [40]:
# Take c17_syn_000.bench file as an example
!cat {RAW_AIG_DIR}/c17_syn_000.bench

# 5 inputs
# 2 outputs

INPUT(0)
INPUT(1)
INPUT(2)
INPUT(3)
INPUT(4)

OUTPUT(8)
OUTPUT(10)

5 = NOT(11)
6 = NOT(12)
7 = NOT(13)
8 = NOT(14)
9 = NOT(17)
10 = AND(5, 9)
11 = AND(0, 1)
12 = AND(2, 5)
13 = AND(1, 3)
14 = AND(6, 7)
15 = NOT(2)
16 = NOT(4)
17 = AND(15, 16)



In [None]:
df.head()

In [5]:
#@markdown Run to load processed data

import zipfile
import os

# Path to the zip file in Google Drive
zip_path = '/content/drive/My Drive/DeepGate2/train.zip'
train_data = '/content/DeepGate2/data/train'
train_data_dir = '/content/DeepGate2/data'

# Remove the folder if it exists
if os.path.exists(train_data):
    !rm -rf {train_data}

# Unzipping the file
with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(train_data_dir)



In [6]:
#@markdown Run to load pre-trained model

%cd /content/DeepGate2/
%cp /content/drive/MyDrive/deepgate_exp_backup.zip /content/DeepGate2/
!unzip /content/DeepGate2/deepgate_exp_backup.zip

/content/DeepGate2
cp: cannot stat '/content/drive/MyDrive/deepgate_exp_backup.zip': No such file or directory
unzip:  cannot find or open /content/DeepGate2/deepgate_exp_backup.zip, /content/DeepGate2/deepgate_exp_backup.zip.zip or /content/DeepGate2/deepgate_exp_backup.zip.ZIP.


In [7]:
%cd /content/DeepGate2/

/content/DeepGate2


In [8]:
#@markdown Run to install conda and Create deepgate2 environment

!pip install -q condacolab
import condacolab
condacolab.install()
!conda create -n deepgate2 python=3.8.10
!source activate deepgate2 && conda info

⏬ Downloading https://github.com/conda-forge/miniforge/releases/download/23.11.0-0/Mambaforge-23.11.0-0-Linux-x86_64.sh...
📦 Installing...
📌 Adjusting configuration...
🩹 Patching environment...
⏲ Done in 0:00:13
🔁 Restarting kernel...
Channels:
 - conda-forge
Platform: linux-64
Collecting package metadata (repodata.json): - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - \ | / - 