In [21]:
import os
os.environ["DEBUG"] = "5"
from tinygrad import Tensor, dtypes
from tinygrad import nn
from tinygrad.nn.state import load_state_dict, safe_load
from tinygrad.helpers import fetch
from tinygrad.uop.ops import Ops, buffers  # buffers is a UOp→Buffer cache

def is_realized(t: Tensor) -> bool:
    u = t.uop
    if u.op == Ops.BUFFER:          # base buffer
        return u in buffers
    if u.op == Ops.BUFFER_VIEW:     # view of a base buffer
        return u.src[0] in buffers  # the base BUFFER is u.src[0]
    return False

In [15]:
class FakeModel:
    def __init__(self):
        self.a = nn.Linear(128, 128)

fake_state_dict = {"a.weight": Tensor.empty((128, 128), dtype=dtypes.float32).realize(),
                   "a.bias": Tensor.empty(128, dtype=dtypes.float32).realize()}

scheduled 16 kernels in 12.56 ms
*** METAL     34 r_[34m128[0m[90m_[0m[36m32[0m[90m_[0m[33m4[0m[90m_[0m[31m4096[0m[90m_[0m[35m4[0m[90m[0m                            arg  1 mem  0.00 GB tm     51.54us/     1.07ms (     0.87 GFLOPS    1.3|1.3     GB/s) ['randn']
*** METAL     35 E_[90mn3[0m                                         arg  1 mem  0.00 GB tm      7.00us/     1.08ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['uniform']
*** METAL     36 E_[90mn4[0m                                         arg  2 mem  0.00 GB tm      6.38us/     1.09ms (     0.00 GFLOPS    0.0|0.0     GB/s) ['uniform']
#include <metal_stdlib>
using namespace metal;
kernel void E_n7(device unsigned int* data0_1, device unsigned int* data1_1, uint3 gid [[threadgroup_position_in_grid]], uint3 lid [[thread_position_in_threadgroup]]) {
  unsigned int val0 = (*(data1_1+0));
  *(data0_1+0) = (val0+16384u);
}
[32m*** METAL     37[0m E_[90mn7[0m                                         arg  2 mem  0

In [16]:
model = FakeModel()
_ = load_state_dict(model, fake_state_dict, old=True)

ram used:  0.00 GB, a.bias                                            : 100%|█| 


loaded weights in   2.67 ms, 0.00 GB loaded at 0.00 GB/s


In [17]:
weights_url = 'https://huggingface.co/stabilityai/stable-diffusion-2-1/resolve/main/v2-1_768-ema-pruned.safetensors'
weights_fn  = fetch(weights_url, os.path.basename(str(weights_url)))
sd_state_dict = safe_load(weights_fn)

opened device DISK:/Users/marc/Library/Caches/tinygrad/downloads/v2-1_768-ema-pruned.safetensors from pid:5077
[32m*** DISK:/U   56[0m [33mview        8 @ 0         [0m                   arg  2 mem  0.00 GB 
[32m*** DISK:/U   57[0m [33mview   167566 @ 8         [0m                   arg  2 mem  0.00 GB 


In [23]:
is_realized(sd_state_dict["alphas_cumprod"])

False