In [1]:
# Cell 1: Setup & Import
%load_ext autoreload
%autoreload 2

import sys
import pickle
import numpy as np
from pathlib import Path

# 1. Add the project root to the path so we can import from 'src'
# (Assumes this notebook is in 'notebooks/' folder)
current_dir = Path.cwd()
project_root = current_dir.parent
if str(project_root) not in sys.path:
    sys.path.append(str(project_root))

# 2. Import the standard processing function from your library
from src.processing import process_graphs
from src.config import RAW_DATA_PATH

# Cell 2: Load & Process Data
print(f"Loading raw data from: {RAW_DATA_PATH}")
with open(RAW_DATA_PATH, 'rb') as f:
    data = pickle.load(f)

# This single function handles all the heavy lifting:
# - Discretizing Minkowski values (M4-M12)
# - Relabeling noisy graphs -> "Disordered"
# - Extracting 30 random subgraphs per simulation
print("Processing graphs (this may take 1-2 minutes)...")
subgraphs, labels = process_graphs(data['graphs'], data['metadata'])

print(f"✅ Done! You have {len(subgraphs)} subgraphs ready for experimentation.")

Loading raw data from: /home/npkamath/553Project/553ProjectGraphKernels/data/raw/crystal_graphs_dataset.pkl
Processing graphs (this may take 1-2 minutes)...
Step 1: Collecting features from all graphs...
Step 2: Discretizing features into 20 bins...
Step 3: Relabeling and extracting subgraphs...
   > Extraction complete. Created 396 subgraphs.
✅ Done! You have 396 subgraphs ready for experimentation.
