Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -326,9 +326,10 @@ arguments:
--skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])
--skip-layer-start START SLG enabling point: (default: 0.01)
--skip-layer-end END SLG disabling point: (default: 0.2)
--scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
--scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
--sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
sampling method (default: "euler" for Flux/SD3/Wan, "euler_a" otherwise)
--timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant
--steps STEPS number of sample steps (default: 20)
--high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)
--high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)
Expand All @@ -339,7 +340,7 @@ arguments:
--high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])
--high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)
--high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)
--high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)
--high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}
(high noise) sampling method (default: "euler_a")
--high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)
Expand All @@ -352,7 +353,7 @@ arguments:
--rng {std_default, cuda} RNG (default: cuda)
-s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)
-b, --batch-count COUNT number of images to generate
--clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
--clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)
<= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x
--vae-tiling process vae in tiles to reduce memory usage
--vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)
Expand Down
48 changes: 48 additions & 0 deletions denoiser.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,25 @@ struct GITSSchedule : SigmaSchedule {
}
};

struct SGMUniformSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min_in, float sigma_max_in, t_to_sigma_t t_to_sigma_func) override {
std::vector<float> result;
if (n == 0) {
result.push_back(0.0f);
return result;
}
result.reserve(n + 1);
int t_max = TIMESTEPS - 1;
int t_min = 0;
std::vector<float> timesteps = linear_space(static_cast<float>(t_max), static_cast<float>(t_min), n + 1);
for (int i = 0; i < n; i++) {
result.push_back(t_to_sigma_func(timesteps[i]));
}
result.push_back(0.0f);
return result;
}
};

struct KarrasSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) {
// These *COULD* be function arguments here,
Expand All @@ -251,6 +270,35 @@ struct KarrasSchedule : SigmaSchedule {
}
};

struct SimpleSchedule : SigmaSchedule {
std::vector<float> get_sigmas(uint32_t n, float sigma_min, float sigma_max, t_to_sigma_t t_to_sigma) override {
std::vector<float> result_sigmas;

if (n == 0) {
return result_sigmas;
}

result_sigmas.reserve(n + 1);

int model_sigmas_len = TIMESTEPS;

float step_factor = static_cast<float>(model_sigmas_len) / static_cast<float>(n);

for (uint32_t i = 0; i < n; ++i) {
int offset_from_start_of_py_array = static_cast<int>(static_cast<float>(i) * step_factor);
int timestep_index = model_sigmas_len - 1 - offset_from_start_of_py_array;

if (timestep_index < 0) {
timestep_index = 0;
}

result_sigmas.push_back(t_to_sigma(static_cast<float>(timestep_index)));
}
result_sigmas.push_back(0.0f);
return result_sigmas;
}
};

// Close to Beta Schedule, but increadably simple in code.
struct SmoothStepSchedule : SigmaSchedule {
static constexpr float smoothstep(float x) {
Expand Down
13 changes: 10 additions & 3 deletions examples/cli/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -248,9 +248,10 @@ void print_usage(int argc, const char* argv[]) {
printf(" --skip-layers LAYERS Layers to skip for SLG steps: (default: [7,8,9])\n");
printf(" --skip-layer-start START SLG enabling point: (default: 0.01)\n");
printf(" --skip-layer-end END SLG disabling point: (default: 0.2)\n");
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
printf(" --scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
printf(" --sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
printf(" sampling method (default: \"euler\" for Flux/SD3/Wan, \"euler_a\" otherwise)\n");
printf(" --timestep-shift N shift timestep for NitroFusion models, default: 0, recommended N for NitroSD-Realism around 250 and 500 for NitroSD-Vibrant\n");
printf(" --steps STEPS number of sample steps (default: 20)\n");
printf(" --high-noise-cfg-scale SCALE (high noise) unconditional guidance scale: (default: 7.0)\n");
printf(" --high-noise-img-cfg-scale SCALE (high noise) image guidance scale for inpaint or instruct-pix2pix models: (default: same as --cfg-scale)\n");
Expand All @@ -261,7 +262,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --high-noise-skip-layers LAYERS (high noise) Layers to skip for SLG steps: (default: [7,8,9])\n");
printf(" --high-noise-skip-layer-start (high noise) SLG enabling point: (default: 0.01)\n");
printf(" --high-noise-skip-layer-end END (high noise) SLG disabling point: (default: 0.2)\n");
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep} Denoiser sigma scheduler (default: discrete)\n");
printf(" --high-noise-scheduler {discrete, karras, exponential, ays, gits, smoothstep, sgm_uniform, simple} Denoiser sigma scheduler (default: discrete)\n");
printf(" --high-noise-sampling-method {euler, euler_a, heun, dpm2, dpm++2s_a, dpm++2m, dpm++2mv2, ipndm, ipndm_v, lcm, ddim_trailing, tcd}\n");
printf(" (high noise) sampling method (default: \"euler_a\")\n");
printf(" --high-noise-steps STEPS (high noise) number of sample steps (default: -1 = auto)\n");
Expand All @@ -274,7 +275,7 @@ void print_usage(int argc, const char* argv[]) {
printf(" --rng {std_default, cuda} RNG (default: cuda)\n");
printf(" -s SEED, --seed SEED RNG seed (default: 42, use random seed for < 0)\n");
printf(" -b, --batch-count COUNT number of images to generate\n");
printf(" --clip-skip N ignore last_dot_pos layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
printf(" --clip-skip N ignore last layers of CLIP network; 1 ignores none, 2 ignores one layer (default: -1)\n");
printf(" <= 0 represents unspecified, will be 1 for SD1.x, 2 for SD2.x\n");
printf(" --vae-tiling process vae in tiles to reduce memory usage\n");
printf(" --vae-tile-size [X]x[Y] tile size for vae tiling (default: 32x32)\n");
Expand Down Expand Up @@ -520,6 +521,7 @@ void parse_args(int argc, const char** argv, SDParams& params) {
{"", "--chroma-t5-mask-pad", "", &params.chroma_t5_mask_pad},
{"", "--video-frames", "", &params.video_frames},
{"", "--fps", "", &params.fps},
{"", "--timestep-shift", "", &params.sample_params.shifted_timestep},
};

options.float_options = {
Expand Down Expand Up @@ -875,6 +877,11 @@ void parse_args(int argc, const char** argv, SDParams& params) {
exit(1);
}

if (params.sample_params.shifted_timestep < 0 || params.sample_params.shifted_timestep > 1000) {
fprintf(stderr, "error: timestep-shift must be between 0 and 1000\n");
exit(1);
}

if (params.upscale_repeats < 1) {
fprintf(stderr, "error: upscale multiplier must be at least 1\n");
exit(1);
Expand Down
54 changes: 51 additions & 3 deletions stable-diffusion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -747,6 +747,16 @@ class StableDiffusionGGML {
denoiser->scheduler = std::make_shared<GITSSchedule>();
denoiser->scheduler->version = version;
break;
case SGM_UNIFORM:
LOG_INFO("Running with SGM Uniform schedule");
denoiser->scheduler = std::make_shared<SGMUniformSchedule>();
denoiser->scheduler->version = version;
break;
case SIMPLE:
LOG_INFO("Running with Simple schedule");
denoiser->scheduler = std::make_shared<SimpleSchedule>();
denoiser->scheduler->version = version;
break;
case SMOOTHSTEP:
LOG_INFO("Running with SmoothStep scheduler");
denoiser->scheduler = std::make_shared<SmoothStepSchedule>();
Expand Down Expand Up @@ -1033,6 +1043,7 @@ class StableDiffusionGGML {
float control_strength,
sd_guidance_params_t guidance,
float eta,
int shifted_timestep,
sample_method_t method,
const std::vector<float>& sigmas,
int start_merge_step,
Expand All @@ -1042,6 +1053,10 @@ class StableDiffusionGGML {
ggml_tensor* denoise_mask = NULL,
ggml_tensor* vace_context = NULL,
float vace_strength = 1.f) {
if (shifted_timestep > 0 && !sd_version_is_sdxl(version)) {
LOG_WARN("timestep shifting is only supported for SDXL models!");
shifted_timestep = 0;
}
std::vector<int> skip_layers(guidance.slg.layers, guidance.slg.layers + guidance.slg.layer_count);

float cfg_scale = guidance.txt_cfg;
Expand Down Expand Up @@ -1102,7 +1117,17 @@ class StableDiffusionGGML {
float c_in = scaling[2];

float t = denoiser->sigma_to_t(sigma);
std::vector<float> timesteps_vec(1, t); // [N, ]
std::vector<float> timesteps_vec;
if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
float shifted_t_float = t * (float(shifted_timestep) / float(TIMESTEPS));
int64_t shifted_t = static_cast<int64_t>(roundf(shifted_t_float));
shifted_t = std::max((int64_t)0, std::min((int64_t)(TIMESTEPS - 1), shifted_t));
LOG_DEBUG("shifting timestep from %.2f to %" PRId64 " (sigma: %.4f)", t, shifted_t, sigma);
timesteps_vec.assign(1, (float)shifted_t);
} else {
timesteps_vec.assign(1, t);
}

timesteps_vec = process_timesteps(timesteps_vec, init_latent, denoise_mask);
auto timesteps = vector_to_ggml_tensor(work_ctx, timesteps_vec);
std::vector<float> guidance_vec(1, guidance.distilled_guidance);
Expand Down Expand Up @@ -1200,6 +1225,19 @@ class StableDiffusionGGML {
float* vec_input = (float*)input->data;
float* positive_data = (float*)out_cond->data;
int ne_elements = (int)ggml_nelements(denoised);

if (shifted_timestep > 0 && sd_version_is_sdxl(version)) {
int64_t shifted_t_idx = static_cast<int64_t>(roundf(timesteps_vec[0]));
float shifted_sigma = denoiser->t_to_sigma((float)shifted_t_idx);
std::vector<float> shifted_scaling = denoiser->get_scalings(shifted_sigma);
float shifted_c_skip = shifted_scaling[0];
float shifted_c_out = shifted_scaling[1];
float shifted_c_in = shifted_scaling[2];

c_skip = shifted_c_skip * c_in / shifted_c_in;
c_out = shifted_c_out;
}

for (int i = 0; i < ne_elements; i++) {
float latent_result = positive_data[i];
if (has_unconditioned) {
Expand All @@ -1222,6 +1260,7 @@ class StableDiffusionGGML {
// denoised = (v * c_out + input * c_skip) or (input + eps * c_out)
vec_denoised[i] = latent_result * c_out + vec_input[i] * c_skip;
}

int64_t t1 = ggml_time_us();
if (step > 0) {
pretty_progress(step, (int)steps, (t1 - t0) / 1000000.f);
Expand Down Expand Up @@ -1588,6 +1627,8 @@ const char* schedule_to_str[] = {
"exponential",
"ays",
"gits",
"sgm_uniform",
"simple",
"smoothstep",
};

Expand Down Expand Up @@ -1720,7 +1761,8 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
"scheduler: %s, "
"sample_method: %s, "
"sample_steps: %d, "
"eta: %.2f)",
"eta: %.2f, "
"shifted_timestep: %d)",
sample_params->guidance.txt_cfg,
sample_params->guidance.img_cfg,
sample_params->guidance.distilled_guidance,
Expand All @@ -1731,7 +1773,8 @@ char* sd_sample_params_to_str(const sd_sample_params_t* sample_params) {
sd_schedule_name(sample_params->scheduler),
sd_sample_method_name(sample_params->sample_method),
sample_params->sample_steps,
sample_params->eta);
sample_params->eta,
sample_params->shifted_timestep);

return buf;
}
Expand Down Expand Up @@ -1863,6 +1906,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
int clip_skip,
sd_guidance_params_t guidance,
float eta,
int shifted_timestep,
int width,
int height,
enum sample_method_t sample_method,
Expand Down Expand Up @@ -2101,6 +2145,7 @@ sd_image_t* generate_image_internal(sd_ctx_t* sd_ctx,
control_strength,
guidance,
eta,
shifted_timestep,
sample_method,
sigmas,
start_merge_step,
Expand Down Expand Up @@ -2394,6 +2439,7 @@ sd_image_t* generate_image(sd_ctx_t* sd_ctx, const sd_img_gen_params_t* sd_img_g
sd_img_gen_params->clip_skip,
sd_img_gen_params->sample_params.guidance,
sd_img_gen_params->sample_params.eta,
sd_img_gen_params->sample_params.shifted_timestep,
width,
height,
sample_method,
Expand Down Expand Up @@ -2734,6 +2780,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
0,
sd_vid_gen_params->high_noise_sample_params.guidance,
sd_vid_gen_params->high_noise_sample_params.eta,
sd_vid_gen_params->high_noise_sample_params.shifted_timestep,
sd_vid_gen_params->high_noise_sample_params.sample_method,
high_noise_sigmas,
-1,
Expand Down Expand Up @@ -2769,6 +2816,7 @@ SD_API sd_image_t* generate_video(sd_ctx_t* sd_ctx, const sd_vid_gen_params_t* s
0,
sd_vid_gen_params->sample_params.guidance,
sd_vid_gen_params->sample_params.eta,
sd_vid_gen_params->sample_params.shifted_timestep,
sd_vid_gen_params->sample_params.sample_method,
sigmas,
-1,
Expand Down
3 changes: 3 additions & 0 deletions stable-diffusion.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ enum scheduler_t {
EXPONENTIAL,
AYS,
GITS,
SGM_UNIFORM,
SIMPLE,
SMOOTHSTEP,
SCHEDULE_COUNT
};
Expand Down Expand Up @@ -183,6 +185,7 @@ typedef struct {
enum sample_method_t sample_method;
int sample_steps;
float eta;
int shifted_timestep;
} sd_sample_params_t;

typedef struct {
Expand Down
Loading