: 

In [None]:
import pandas as pd
import json
from pathlib import Path

# Base directory
base = Path("digests_project/outputs/eda_units/may-aug_combo3_soft")

# CSVs
csv_files = [
    "co_tag_pairs.csv",
    "doc_tag_long.csv",
    "edges_backbone.csv",
    "edges_bridges.csv",
    "edges_default.csv",
    "edges_niche.csv",
    "edges_audit.csv",
    "edges_backbone_topk.csv",
    "edges_bridges_topk.csv",
    "edges_default_topk.csv",
    "edges_niche_topk.csv",
    "tag_communities.csv",
]

# JSONs
json_files = [
    "gates.json",
    "quantiles.json",
    "counts.json",
    "index.json",
]

# --- Load all CSVs into dict of DataFrames ---
dfs = {}
for fname in csv_files:
    fpath = base / fname
    if fpath.exists():
        try:
            dfs[fname] = pd.read_csv(fpath)
        except Exception as e:
            print(f"⚠️  Could not read {fname}: {e}")

# --- Load JSONs ---
jdata = {}
for fname in json_files:
    fpath = base / fname
    if fpath.exists():
        try:
            with open(fpath, "r", encoding="utf-8") as f:
                jdata[fname] = json.load(f)
        except Exception as e:
            print(f"⚠️  Could not read {fname}: {e}")

# --- Basic exploration per DataFrame ---
for name, df in dfs.items():
    print(f"\n=== {name} ===")
    print("Shape:", df.shape)
    print("Columns:", list(df.columns))
    print(df.head(3))
    print("\nColumn types:")
    print(df.dtypes)
    print("\nNon-null counts:")
    print(df.count())
    print("\nUnique counts:")
    print(df.nunique())
    # top values per column
    for col in df.columns:
        vc = df[col].value_counts(dropna=False).head(5)
        print(f"\nTop 5 values for {col}:")
        print(vc)

# --- Exploration for JSONs ---
for name, obj in jdata.items():
    print(f"\n=== {name} ===")
    if isinstance(obj, dict):
        for k, v in obj.items():
            print(f"{k}: {v}")
    else:
        print(type(obj), obj)
