Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fusion tests from test_opt #4357

Merged
merged 4 commits into from
May 1, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
117 changes: 0 additions & 117 deletions test/external/external_test_opt.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@

from tinygrad import nn, GlobalCounters, Tensor, Device
from tinygrad.helpers import getenv
from tinygrad.nn import optim
from tinygrad.nn.state import get_parameters
from tinygrad.engine.realize import capturing

Expand Down Expand Up @@ -165,103 +164,6 @@ def test_muladd(self):
d.realize()
np.testing.assert_allclose(d.numpy(), na*nb+nc, rtol=1e-5, atol=1e-7)

def test_fold_reduce_elementwise(self):
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

in test_schedule "fold" is "fuse" for consistency.

img = Tensor.ones(32).contiguous()
addme = Tensor.ones(1)
with CLCache() as cache:
ret = img.sum() + addme
ret.realize()
assert cache.count == 1, "optimizer didn't fold reduce/elementwise"
assert ret.item() == 33

def test_fold_batchnorm(self):
with Tensor.train():
img = Tensor.ones(1,32,4,4).contiguous()
bn = nn.BatchNorm2d(32, track_running_stats=False)
with CLCache() as cache:
img_bn = bn(img).realize()
print(img_bn)
assert cache.count == 3, f"optimizer didn't fold batchnorm, got {cache.count}"

def test_fold_conv_sgd(self):
with Tensor.train():
img = Tensor.ones(2,3,4,4)
c1 = nn.Conv2d(3,32,3)
opt = optim.SGD(get_parameters(c1))
with CLCache() as cache:
opt.zero_grad()
c1(img).relu().sum().backward()
opt.step()
assert cache.count == 5, f"optimizer didn't fold conv-backward SGD, got {cache.count}"

def test_fold_conv_adam(self):
with Tensor.train():
img = Tensor.ones(2,3,4,4)
c1 = nn.Conv2d(3,32,3)
opt = optim.Adam(get_parameters(c1), lr=1e-4)
with CLCache(allowed=10):
opt.zero_grad()
c1(img).relu().sum().backward()
opt.step()

def test_fold_2convs_adam(self):
with Tensor.train():
img = Tensor.ones(2,3,64,64)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = optim.Adam(get_parameters([c1, c2]), lr=1e-4)
with CLCache(allowed=13):
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
opt.step()

def test_fold_2convs_sgd(self):
with Tensor.train():
img = Tensor.ones(2,3,64,64)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = optim.SGD(get_parameters([c1, c2]))
with CLCache(allowed=8):
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
opt.step()

def test_fold_2convs_sgd_nesterov_momentum_wd(self):
with Tensor.train():
img = Tensor.ones(2,3,64,64)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = optim.SGD(get_parameters([c1, c2]), nesterov=True, momentum=0.9, weight_decay=0.1)
with CLCache(allowed=10):
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
opt.step()

def test_fold_4convs_sgd(self):
with Tensor.train():
img = Tensor.ones(2,3,64,64)
c1 = nn.Conv2d(3,4,3,bias=False)
c2 = nn.Conv2d(4,8,3,bias=False)
c3 = nn.Conv2d(8,16,3,bias=False)
c4 = nn.Conv2d(16,32,3,bias=False)
opt = optim.SGD(get_parameters([c1, c2, c3, c4]))
with CLCache(allowed=18):
opt.zero_grad()
c4(c3(c2(c1(img).relu()).relu()).relu()).relu().sum().backward()
opt.step()

def test_fold_conv_batchnorm_sgd(self):
with Tensor.train():
img = Tensor.ones(1,3,4,4)
c1 = nn.Conv2d(3,32,3)
bn = nn.BatchNorm2d(32, track_running_stats=False)
opt = optim.SGD(get_parameters([c1, bn]))
with CLCache(allowed=16): # this is too high
img_bn = bn(c1(img)).elu().sum()
opt.zero_grad()
img_bn.backward()
opt.step()

def test_fold_conv_batchnorm_notrain(self):
img = Tensor.ones(1,3,8,8)
c1 = nn.Conv2d(3,32,3)
Expand All @@ -272,16 +174,6 @@ def test_fold_conv_batchnorm_notrain(self):
bn(c1(img)).relu().realize()
assert cache.count == 1, f"optimizer didn't fold conv-batchnorm at test time, got {cache.count}"

def test_fold_conv_batchnorm(self):
with Tensor.train():
img = Tensor.ones(1,3,8,8)
c1 = nn.Conv2d(3,32,3)
bn = nn.BatchNorm2d(32, track_running_stats=False)
with CLCache() as cache:
img_conv = bn(c1(img)).relu().realize()
print(img_conv)
assert cache.count == 4, f"optimizer didn't fold conv-batchnorm, got {cache.count}"

def test_fold_conv_elu(self):
img = Tensor.ones(1,4,8,8)
c1 = nn.Conv2d(4, 4, kernel_size=3)
Expand All @@ -300,15 +192,6 @@ def test_fold_conv_relu(self):
print(img_conv)
assert cache.count == 2, "optimizer didn't fold conv/relu"

def test_fold_conv_relu_nobias(self):
img = Tensor.ones(1,4,8,8)
c1 = nn.Conv2d(4, 4, kernel_size=3, bias=False)
c2 = nn.Conv2d(4, 4, kernel_size=3, bias=False)
with CLCache() as cache:
img_conv = img.sequential([c1, Tensor.relu, c2, Tensor.relu]).realize()
print(img_conv)
assert cache.count == 2, "optimizer didn't fold conv/relu"

def test_permute_was_pushed(self):
a = Tensor.randn(16, 16, 16)
with CLCache(2):
Expand Down
87 changes: 87 additions & 0 deletions test/test_schedule.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,26 @@ def test_fold_batchnorm(self):
out = bn(img)
check_schedule(out, 3)

def test_fold_conv_batchnorm(self):
with Tensor.train():
img = Tensor.empty(1,3,8,8)
c1 = nn.Conv2d(3,32,3)
bn = nn.BatchNorm2d(32, track_running_stats=False)
out = bn(c1(img)).relu()
check_schedule(out, 4, [c1.weight, c1.bias])

def test_fold_conv_batchnorm_sgd(self):
with Tensor.train():
img = Tensor.ones(1,3,4,4)
c1 = nn.Conv2d(3,32,3)
bn = nn.BatchNorm2d(32, track_running_stats=False)
opt = nn.optim.SGD(nn.state.get_parameters([c1, bn]))
img_bn = bn(c1(img)).elu().sum()
opt.zero_grad()
img_bn.backward()
# this is too high
check_schedule(opt.schedule_step(), 18)

def test_fold_conv_relu(self):
c1 = nn.Conv2d(3,16,3)

Expand All @@ -198,6 +218,13 @@ def test_fold_conv_relu(self):
out = c1(img).relu()
check_schedule(out, 1, [c1.weight, c1.bias])

def test_fold_conv_relu_nobias(self):
img = Tensor.ones(1,4,8,8)
c1 = nn.Conv2d(4, 4, kernel_size=3, bias=False)
c2 = nn.Conv2d(4, 4, kernel_size=3, bias=False)
out = img.sequential([c1, Tensor.relu, c2, Tensor.relu])
check_schedule(out, 2, [c1.weight, c2.weight, img])

def test_fold_conv_elu(self):
c1 = nn.Conv2d(3,16,3)

Expand Down Expand Up @@ -556,6 +583,66 @@ def test_adam_step_fusion(self):
layer(x).relu().sum().backward()
check_schedule(opt.schedule_step(), 14)

def test_adam_conv_fuse(self):
with Tensor.train():
img = Tensor.empty(2,3,4,4)
c1 = nn.Conv2d(3,32,3)
opt = nn.optim.Adam(nn.state.get_parameters(c1), lr=1e-4)
opt.zero_grad()
c1(img).relu().sum().backward()
check_schedule(opt.schedule_step(), 14)

def test_adam_2convs_fuse(self):
with Tensor.train():
img = Tensor.empty(2,3,4,4)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = nn.optim.Adam(nn.state.get_parameters([c1, c2]), lr=1e-4)
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
check_schedule(opt.schedule_step(), 15)

def test_sgd_conv_fuse(self):
with Tensor.train():
img = Tensor.empty(2,3,4,4)
c1 = nn.Conv2d(3,32,3)
opt = nn.optim.SGD(nn.state.get_parameters(c1))
opt.zero_grad()
c1(img).relu().sum().backward()
check_schedule(opt.schedule_step(), 7)

def test_sgd_2convs_fuse(self):
with Tensor.train():
img = Tensor.empty(2,3,4,4)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = nn.optim.SGD(nn.state.get_parameters([c1, c2]))
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
check_schedule(opt.schedule_step(), 7)

def test_fold_2convs_sgd_nesterov_momentum_wd(self):
with Tensor.train():
img = Tensor.empty(2,3,4,4)
c1 = nn.Conv2d(3,16,3,bias=False)
c2 = nn.Conv2d(16,32,3,bias=False)
opt = nn.optim.SGD(nn.state.get_parameters([c1, c2]), nesterov=True, momentum=0.9, weight_decay=0.1)
opt.zero_grad()
c2(c1(img).relu()).relu().sum().backward()
check_schedule(opt.schedule_step(), 9)

def test_sgd_4convs_fuse(self):
with Tensor.train():
img = Tensor.empty(2,3,64,64)
c1 = nn.Conv2d(3,4,3,bias=False)
c2 = nn.Conv2d(4,8,3,bias=False)
c3 = nn.Conv2d(8,16,3,bias=False)
c4 = nn.Conv2d(16,32,3,bias=False)
opt = nn.optim.SGD(nn.state.get_parameters([c1, c2, c3, c4]))
opt.zero_grad()
c4(c3(c2(c1(img).relu()).relu()).relu()).relu().sum().backward()
check_schedule(opt.schedule_step(), 22)

@unittest.skipUnless(is_dtype_supported(dtypes.half), "need half")
def test_prefer_half_buffer(self):
x = Tensor.ones(4).contiguous().realize()
Expand Down