No OR gates yet

In [17]:
import re
import csv
from pathlib import Path
from typing import List, Tuple, Optional
import pandas as pd

circuits_hex_list = [
"0x000D",
"0x0239",
"0x0304",
"0x040B",
"0x0575",
"0x057A",
"0x0643",
"0x0760",
"0x09AF",
"0x0F42",
"0x1038",
"0x1048",
"0x10C9",
"0x1284",
"0x1323",
"0x13CE",
"0x1714",
"0x1858",
"0x1A60",
"0x1AC6",
"0x1CBF",
"0x1D95",
"0x1FDE",
"0x226B",
"0x22C6",
"0x23A7",
"0x240F",
"0x2A38",
"0x2A56",
"0x2FC7",
"0x3060",
"0x30CE",
"0x32AA",
"0x35C3",
"0x36DC",
"0x3812",
"0x3A17",
"0x3B31",
"0x3B60",
"0x3B68",
"0x409B",
"0x41A2",
"0x41B2",
"0x429B",
"0x4724",
"0x47FD",
"0x48C1",
"0x4A32",
"0x4BF8",
"0x5215",
"0x53AF",
"0x53D7",
"0x599A",
"0x5AAD",
"0x5B30",
"0x5DA9",
"0x5F01",
"0x5FE2",
"0x616A",
"0x648B",
"0x6572",
"0x680A",
"0x6847",
"0x699D",
"0x6F2A",
"0x7096",
"0x70EC",
"0x7176",
"0x822B",
"0x850E",
"0x8F63",
"0x914C",
"0x918A",
"0x93AC",
"0x9591",
"0x96F7",
"0x9917",
"0x9BF5",
"0x9F8A",
"0xA2DA",
"0xA7B2",
"0xA960",
"0xB744",
"0xB8AD",
"0xBC16",
"0xBCA3",
"0xBDF1",
"0xBEE9",
"0xBF36",
"0xC248",
"0xC4B2",
"0xC766",
"0xCB82",
"0xCBD6",
"0xCE97",
"0xD319",
"0xD326",
"0xD477",
"0xD4E4",
"0xD550",
"0xDA80",
"0xDBFA",
"0xE605",
"0xE677",
"0xE93A",
"0xECF1",
"0xEFEB",
"0xF43F",
"0xF4E7",
"0xF5A4",
"0xFC79"]

# Patterns
HEX_RE = re.compile(r'0x[0-9A-Fa-f]{4}')
# Matches: num GATES in file.v netlist: 123
GATES_LINE_RE = re.compile(
    r'num\s+GATES\s+in\s+([^\s:]+)\s+netlist:\s*(\d+)',
    re.IGNORECASE
)


def first_hex_from_text(text: str) -> Optional[str]:
    m = HEX_RE.search(text)
    return m.group(0).upper() if m else None

def extract_gates_line(text: str) -> Tuple[Optional[str], Optional[int]]:
    """
    Returns (netlist_file, n_gates) from a line like:
      num GATES in file.v netlist: n
    """
    m = GATES_LINE_RE.search(text)
    if not m:
        return None, None
    netlist_file = m.group(1)
    try:
        n = int(m.group(2))
    except ValueError:
        n = None
    return netlist_file, n

def parse_file(p: Path) -> Tuple[Optional[str], Optional[str], Optional[int]]:
    """
    Returns (hex_in_file, netlist_file, n_gates)
    """
    try:
        text = p.read_text(errors="ignore")
    except Exception:
        return None, None, None
    hex_in_file = first_hex_from_text(text)
    netlist_file, n_gates = extract_gates_line(text)
    return (hex_in_file.upper() if hex_in_file else None, netlist_file, n_gates)

def find_files_for_hex(root: Path, circuit_hex: str,
                       fallback_extensions=(".log",".txt",".out",".json",".ucf",".csv")) -> list[Path]:
    """
    1) Try filename match (fast).
    2) If none found, fall back to scanning CONTENT (limited extensions)
       and keep files whose FIRST hex equals circuit_hex.
    """
    # 1) Filename-based
    fname_hits = sorted([p for p in root.rglob("*")
                         if p.is_file() and circuit_hex.lower() in p.name.lower()])
    fname_hits.sort(key=lambda p: (0 if p.suffix.lower()==".log" else 1, str(p).lower()))
    if fname_hits:
        return fname_hits

    # 2) Content-based fallback
    hits = []
    for p in root.rglob("*"):
        if not p.is_file():
            continue
        if p.suffix.lower() not in fallback_extensions:
            continue
        try:
            txt = p.read_text(errors="ignore")
        except Exception:
            continue
        first_hex = first_hex_from_text(txt)
        if first_hex and first_hex.upper() == circuit_hex.upper():
            hits.append(p)

    hits.sort(key=lambda p: (0 if p.suffix.lower()==".log" else 1, str(p).lower()))
    return hits

def run_extractor(root_folder: str, output_csv: str = "circuit_gates_summary.csv") -> pd.DataFrame:
    root = Path(root_folder)
    out_csv = Path(output_csv)

    rows = []
    for idx, hx in enumerate(circuits_hex_list):
        files = find_files_for_hex(root, hx)
        if not files:
            rows.append({
                "order_index": idx,
                "circuit_hex_from_list": hx,
                "circuit_hex_from_file": None,
                "netlist_file": None,
                "gates_n": None,
                "gates_needed": None,   # kept for backward compatibility
                "source_file": None,
                "status": "file_not_found",
            })
            continue

        found_any = False
        for f in files:
            hex_in, netlist_file, n_g = parse_file(f)
            if (hex_in is not None) or (netlist_file is not None) or (n_g is not None):
                rows.append({
                    "order_index": idx,
                    "circuit_hex_from_list": hx,
                    "circuit_hex_from_file": hex_in,
                    "netlist_file": netlist_file,
                    "gates_n": n_g,
                    "gates_needed": n_g,  # same value, legacy name
                    "source_file": str(f),
                    "status": "ok" if (hex_in is not None and netlist_file is not None and n_g is not None) else "partial",
                })
                found_any = True
                break
        if not found_any:
            rows.append({
                "order_index": idx,
                "circuit_hex_from_list": hx,
                "circuit_hex_from_file": None,
                "netlist_file": None,
                "gates_n": None,
                "gates_needed": None,
                "source_file": str(files[0]),
                "status": "parse_failed",
            })

    out_csv.parent.mkdir(parents=True, exist_ok=True)
    with out_csv.open("w", newline="") as fp:
        writer = csv.DictWriter(fp, fieldnames=[
            "order_index","circuit_hex_from_list","circuit_hex_from_file",
            "netlist_file","gates_n","gates_needed","source_file","status"
        ])
        writer.writeheader()
        writer.writerows(rows)

    return pd.DataFrame(rows)




In [18]:
# Edit these two paths as needed:
ROOT_FOLDER = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/dgd/data/Cello_2_1_designs/"                 # e.g., "/path/to/your/logs"
OUTPUT_CSV  = "/home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/dgd/data/Cello_2_1_designs/circuit_gates_summary.csv"

df = run_extractor(ROOT_FOLDER, OUTPUT_CSV)  # uses the functions you already pasted
print(f"Wrote {len(df)} rows to {OUTPUT_CSV}")

# Quick sanity checks
print("\nStatus counts:")
print(df["status"].value_counts(dropna=False))

print("\nSample of parsed rows (status == 'ok'):")
display(df[df["status"] == "ok"].head(20))

print("\nRows that need attention (not ok):")
display(df[df["status"] != "ok"].head(20))


Wrote 111 rows to /home/gridsan/spalacios/Designing complex biological circuits with deep neural networks/dgd/data/Cello_2_1_designs/circuit_gates_summary.csv

Status counts:
status
ok    111
Name: count, dtype: int64

Sample of parsed rows (status == 'ok'):


Unnamed: 0,order_index,circuit_hex_from_list,circuit_hex_from_file,netlist_file,gates_n,gates_needed,source_file,status
0,0,0x000D,0X000D,0x000D_V2.v,7,7,/home/gridsan/spalacios/Designing complex biol...,ok
1,1,0x0239,0X0239,0x0239_V2.v,12,12,/home/gridsan/spalacios/Designing complex biol...,ok
2,2,0x0304,0X0304,0x0304_V2.v,9,9,/home/gridsan/spalacios/Designing complex biol...,ok
3,3,0x040B,0X040B,0x040B_V2.v,10,10,/home/gridsan/spalacios/Designing complex biol...,ok
4,4,0x0575,0X0575,0x0575_V2.v,5,5,/home/gridsan/spalacios/Designing complex biol...,ok
5,5,0x057A,0X057A,0x057A_V2.v,14,14,/home/gridsan/spalacios/Designing complex biol...,ok
6,6,0x0643,0X0643,0x0643_V2.v,13,13,/home/gridsan/spalacios/Designing complex biol...,ok
7,7,0x0760,0X0760,0x0760_V2.v,11,11,/home/gridsan/spalacios/Designing complex biol...,ok
8,8,0x09AF,0X09AF,0x09AF_V2.v,10,10,/home/gridsan/spalacios/Designing complex biol...,ok
9,9,0x0F42,0X0F42,0x0F42_V2.v,13,13,/home/gridsan/spalacios/Designing complex biol...,ok



Rows that need attention (not ok):


Unnamed: 0,order_index,circuit_hex_from_list,circuit_hex_from_file,netlist_file,gates_n,gates_needed,source_file,status
