In [5]:
# Quick shape/structure summary for a large dict pickle
import math
from collections import Counter
import os, json, pickle, random, math, pathlib
from typing import Any, Iterable
import numpy as np
import pandas as pd

def safe_pickle_load(path: str):
    """
    WARNING: pickle.load executes code on deserialization. Only load files you trust.
    """
    with open(path, "rb") as f:
        return pickle.load(f)

d = safe_pickle_load("all_trajs.pkl")

if not isinstance(d, dict):
    print(f"Top-level type: {type(d).__name__}")
else:
    print(f"Top-level type: dict")
    print(f"Top-level keys: {len(d)}")

    # Sample some keys and show value types/sizes
    num_samples = 50
    sample_keys = list(d.keys())[:num_samples]
    # sample_keys = list(d.keys())[:10]
    print("\nSample keys:", sample_keys)

    def value_shape(v):
        try:
            import numpy as np, pandas as pd
        except Exception:
            np = pd = None
        if 'pd' in globals() and isinstance(v, pd.DataFrame):
            return f"DataFrame shape={tuple(v.shape)}"
        if 'pd' in globals() and isinstance(v, pd.Series):
            return f"Series len={len(v)}"
        if 'np' in globals() and isinstance(v, np.ndarray):
            return f"ndarray shape={tuple(v.shape)}"
        if isinstance(v, dict):
            return f"dict keys={len(v)}"
        if isinstance(v, (list, tuple, set)):
            return f"{type(v).__name__} len={len(v)}"
        try:
            return f"{type(v).__name__} len={len(v)}"
        except Exception:
            return type(v).__name__

    print(f"\nValue preview for first {num_samples} keys:")
    for k in sample_keys:
        v = d[k]
        print(f"  - {k!r}: {value_shape(v)}")
    
    # --- ADDED: print nested list lengths (bounded) ---
    def nested_lengths_list_of_lists(v, max_outer=50, max_inner=50):
        """
        Expect v like list(list(list(...))).
        Returns a bounded summary: outer length, list of lengths for first max_outer outer elements,
        and for each of those, the lengths of their first max_inner inner elements.
        """
        if not isinstance(v, list):
            return None
        outer_len = len(v)
        outer_sample = v[:max_outer]
        outer_lengths = []
        inner_lengths = []
        for a in outer_sample:
            try:
                outer_lengths.append(len(a))
            except Exception:
                outer_lengths.append(None)
            # collect inner lengths for this outer element
            if isinstance(a, list):
                inner_sample = a[:max_inner]
                il = []
                for b in inner_sample:
                    try:
                        il.append(len(b))
                    except Exception:
                        il.append(None)
                inner_lengths.append(il)
            else:
                inner_lengths.append(None)
        return outer_len, outer_lengths, inner_lengths

    print(f"\nNested lengths for first {num_samples} keys (bounded):")
    for k in sample_keys:
        v = d[k]
        if isinstance(v, list):
            summary = nested_lengths_list_of_lists(v, max_outer=10, max_inner=50)
            if summary is None:
                print(f"  - {k!r}: not a list")
                continue
            outer_len, outer_lengths, inner_lengths = summary
            print(f"  - {k!r}: outer_len={outer_len}, outer_lengths_sample={outer_lengths}")
            # print inner_lengths for the same sampled outer elements (bounded)
            for i, il in enumerate(inner_lengths):
                print(f"      outer[{i}] inner_lengths_sample={il}")
        else:
            print(f"  - {k!r}: type={type(v).__name__} (skipping nested-lengths)")
    # --- END ADDED ---

    # Lightweight global stats (bounded)
    type_counts = Counter()
    length_samples = []
    max_scan = 10000
    for i, (_, v) in enumerate(d.items()):
        if i >= max_scan:
            break
        type_counts[type(v).__name__] += 1
        try:
            if hasattr(v, "__len__") and not isinstance(v, (str, bytes)):
                length_samples.append(len(v))
        except Exception:
            pass

    print("\nValue type distribution (first", min(len(d), max_scan), "items):")
    for t, c in type_counts.most_common():
        print(f"  {t}: {c}")

    if length_samples:
        import statistics as stats
        print("\nNested length stats (sampled):")
        print(f"  count={len(length_samples)} "
              f"min={min(length_samples)} "
              f"p50={int(stats.median(length_samples))} "
              f"p90={int(stats.quantiles(length_samples, n=10)[8]) if len(length_samples)>=10 else 'n/a'} "
              f"max={max(length_samples)}")

Top-level type: dict
Top-level keys: 50

Sample keys: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49]

Value preview for first 50 keys:
  - 0: list len=1013
  - 1: list len=797
  - 2: list len=998
  - 3: list len=667
  - 4: list len=940
  - 5: list len=1350
  - 6: list len=899
  - 7: list len=1038
  - 8: list len=954
  - 9: list len=616
  - 10: list len=1181
  - 11: list len=893
  - 12: list len=980
  - 13: list len=808
  - 14: list len=774
  - 15: list len=992
  - 16: list len=996
  - 17: list len=1083
  - 18: list len=626
  - 19: list len=1156
  - 20: list len=960
  - 21: list len=1084
  - 22: list len=955
  - 23: list len=922
  - 24: list len=462
  - 25: list len=779
  - 26: list len=959
  - 27: list len=795
  - 28: list len=918
  - 29: list len=741
  - 30: list len=855
  - 31: list len=815
  - 32: list len=862
  - 33: list len=898
  - 34: li