Skip to content

Commit

Permalink
Update the T2I-Adapter unit tests to run with the standard number of …
Browse files Browse the repository at this point in the history
…UNet down blocks so that all T2I-Adapter down blocks get exercised.
  • Loading branch information
RyanJDick committed Oct 17, 2023
1 parent 2def4b4 commit 6d4a060
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 36 deletions.
47 changes: 27 additions & 20 deletions tests/pipelines/stable_diffusion/test_stable_diffusion_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,23 +57,24 @@ class AdapterTests:
def get_dummy_components(self, adapter_type):
torch.manual_seed(0)
unet = UNet2DConditionModel(
block_out_channels=(32, 64),
block_out_channels=(32, 32, 32, 64),
layers_per_block=2,
sample_size=32,
in_channels=4,
out_channels=4,
down_block_types=("CrossAttnDownBlock2D", "DownBlock2D"),
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
# Test with all 4 down blocks to ensure that the T2I-Adapter downscaling gets fully exercised in the tests.
down_block_types=("CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D", "DownBlock2D"),
up_block_types=("UpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "CrossAttnUpBlock2D"),
cross_attention_dim=32,
)
scheduler = PNDMScheduler(skip_prk_steps=True)
torch.manual_seed(0)
vae = AutoencoderKL(
block_out_channels=[32, 64],
block_out_channels=[32, 32, 32, 64],
in_channels=3,
out_channels=3,
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"],
latent_channels=4,
)
torch.manual_seed(0)
Expand All @@ -96,26 +97,26 @@ def get_dummy_components(self, adapter_type):
if adapter_type == "full_adapter" or adapter_type == "light_adapter":
adapter = T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 32, 64],
num_res_blocks=2,
downscale_factor=2,
downscale_factor=8,
adapter_type=adapter_type,
)
elif adapter_type == "multi_adapter":
adapter = MultiAdapter(
[
T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 32, 64],
num_res_blocks=2,
downscale_factor=2,
downscale_factor=8,
adapter_type="full_adapter",
),
T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 32, 64],
num_res_blocks=2,
downscale_factor=2,
downscale_factor=8,
adapter_type="full_adapter",
),
]
Expand All @@ -137,7 +138,7 @@ def get_dummy_components(self, adapter_type):
}
return components

def get_dummy_inputs(self, device, seed=0, height=64, width=64, num_images=1):
def get_dummy_inputs(self, device, seed=0, height=128, width=128, num_images=1):
if num_images == 1:
image = floats_tensor((1, 3, height, width), rng=random.Random(seed)).to(device)
else:
Expand Down Expand Up @@ -188,8 +189,10 @@ def test_stable_diffusion_adapter_default_case(self):
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]

assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4858, 0.5500, 0.4278, 0.4669, 0.6184, 0.4322, 0.5010, 0.5033, 0.4746])
assert image.shape == (1, 128, 128, 3)
expected_slice = np.array(
[0.27978146, 0.36439905, 0.3206715, 0.29253614, 0.36390454, 0.3165658, 0.4384598, 0.43083128, 0.38120443]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3


Expand All @@ -208,16 +211,18 @@ def test_stable_diffusion_adapter_default_case(self):
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]

assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4965, 0.5548, 0.4330, 0.4771, 0.6226, 0.4382, 0.5037, 0.5071, 0.4782])
assert image.shape == (1, 128, 128, 3)
expected_slice = np.array(
[0.27612323, 0.3632322, 0.31936637, 0.2896598, 0.3640582, 0.31550938, 0.438073, 0.43133593, 0.38108835]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3


class StableDiffusionMultiAdapterPipelineFastTests(AdapterTests, PipelineTesterMixin, unittest.TestCase):
def get_dummy_components(self):
return super().get_dummy_components("multi_adapter")

def get_dummy_inputs(self, device, height=64, width=64, seed=0):
def get_dummy_inputs(self, device, height=128, width=128, seed=0):
inputs = super().get_dummy_inputs(device, seed, height=height, width=width, num_images=2)
inputs["adapter_conditioning_scale"] = [0.5, 0.5]
return inputs
Expand All @@ -233,8 +238,10 @@ def test_stable_diffusion_adapter_default_case(self):
image = sd_pipe(**inputs).images
image_slice = image[0, -3:, -3:, -1]

assert image.shape == (1, 64, 64, 3)
expected_slice = np.array([0.4902, 0.5539, 0.4317, 0.4682, 0.6190, 0.4351, 0.5018, 0.5046, 0.4772])
assert image.shape == (1, 128, 128, 3)
expected_slice = np.array(
[0.27745497, 0.36333847, 0.32152444, 0.29158655, 0.3639205, 0.31645983, 0.4382596, 0.43064785, 0.3810274]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3

def test_inference_batch_consistent(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,19 +48,20 @@ class StableDiffusionXLAdapterPipelineFastTests(PipelineTesterMixin, unittest.Te
def get_dummy_components(self, adapter_type="full_adapter_xl"):
torch.manual_seed(0)
unet = UNet2DConditionModel(
block_out_channels=(32, 64),
block_out_channels=(32, 32, 64),
layers_per_block=2,
sample_size=32,
in_channels=4,
out_channels=4,
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D"),
up_block_types=("CrossAttnUpBlock2D", "UpBlock2D"),
# Test with all 3 down blocks to ensure that the T2I-Adapter downscaling gets fully exercised in the tests.
down_block_types=("DownBlock2D", "CrossAttnDownBlock2D", "CrossAttnDownBlock2D"),
up_block_types=("CrossAttnUpBlock2D", "CrossAttnUpBlock2D", "UpBlock2D"),
# SD2-specific config below
attention_head_dim=(2, 4),
attention_head_dim=2,
use_linear_projection=True,
addition_embed_type="text_time",
addition_time_embed_dim=8,
transformer_layers_per_block=(1, 2),
transformer_layers_per_block=1,
projection_class_embeddings_input_dim=80, # 6 * 8 + 32
cross_attention_dim=64,
)
Expand All @@ -73,11 +74,11 @@ def get_dummy_components(self, adapter_type="full_adapter_xl"):
)
torch.manual_seed(0)
vae = AutoencoderKL(
block_out_channels=[32, 64],
block_out_channels=[32, 32, 32, 64],
in_channels=3,
out_channels=3,
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D"],
down_block_types=["DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D", "DownEncoderBlock2D"],
up_block_types=["UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D", "UpDecoderBlock2D"],
latent_channels=4,
sample_size=128,
)
Expand All @@ -104,26 +105,26 @@ def get_dummy_components(self, adapter_type="full_adapter_xl"):
if adapter_type == "full_adapter_xl":
adapter = T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 64],
num_res_blocks=2,
downscale_factor=4,
downscale_factor=16,
adapter_type=adapter_type,
)
elif adapter_type == "multi_adapter":
adapter = MultiAdapter(
[
T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 64],
num_res_blocks=2,
downscale_factor=4,
downscale_factor=16,
adapter_type="full_adapter_xl",
),
T2IAdapter(
in_channels=3,
channels=[32, 64],
channels=[32, 32, 64],
num_res_blocks=2,
downscale_factor=4,
downscale_factor=16,
adapter_type="full_adapter_xl",
),
]
Expand Down Expand Up @@ -182,7 +183,7 @@ def test_stable_diffusion_adapter_default_case(self):

assert image.shape == (1, 64, 64, 3)
expected_slice = np.array(
[0.5752919, 0.6022097, 0.4728038, 0.49861962, 0.57084894, 0.4644975, 0.5193715, 0.5133664, 0.4729858]
[0.54066867, 0.5574119, 0.47273698, 0.62617165, 0.576637, 0.50059223, 0.5860178, 0.5637511, 0.5143911]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3

Expand Down Expand Up @@ -242,7 +243,7 @@ def test_stable_diffusion_adapter_default_case(self):

assert image.shape == (1, 64, 64, 3)
expected_slice = np.array(
[0.5813032, 0.60995954, 0.47563356, 0.5056669, 0.57199144, 0.4631841, 0.5176794, 0.51252556, 0.47183886]
[0.54532427, 0.5617021, 0.46638602, 0.6338569, 0.5826053, 0.5019801, 0.59624064, 0.5663944, 0.5151665]
)
assert np.abs(image_slice.flatten() - expected_slice).max() < 5e-3

Expand Down

0 comments on commit 6d4a060

Please sign in to comment.