Big refactor of SD runner and added image generator

ostris · Aug 3, 2023 · 66c6f0f · 66c6f0f
1 parent 75ec5d9
commit 66c6f0f
Show file tree

Hide file tree

Showing 16 changed files with 928 additions and 435 deletions.
diff --git a/README.md b/README.md
@@ -42,6 +42,16 @@ here so far.
 
 ---
 
+### Batch Image Generation
+
+A image generator that can take frompts from a config file or form a txt file and generate them to a 
+folder. I mainly needed this for an SDXL test I am doing but added some polish to it so it can be used
+for generat batch image generation.
+It all runs off a config file, which you can find an example of in  `config/examples/generate.example.yaml`.
+Mere info is in the comments in the example
+
+---
+
 ### LoRA (lierla), LoCON (LyCORIS) extractor
 
 It is based on the extractor in the [LyCORIS](https://github.com/KohakuBlueleaf/LyCORIS) tool, but adding some QOL features
@@ -143,6 +153,11 @@ Just went in and out. It is much worse on smaller faces than shown here.
 
 ## Change Log
 
+#### 2021-08-03
+Another big refactor to make SD more modular.
+
+Made batch image generation script
+
 #### 2021-08-01
 Major changes and update. New LoRA rescale tool, look above for details. Added better metadata so
 Automatic1111 knows what the base model is. Added some experiments and a ton of updates. This thing is still unstable

diff --git a/config/examples/generate.example.yaml b/config/examples/generate.example.yaml
@@ -0,0 +1,60 @@
+---
+
+job: generate # tells the runner what to do
+config:
+  name: "generate" # this is not really used anywhere currently but required by runner
+  process:
+    # process 1
+    - type: to_folder  # process images to a folder
+      output_folder: "output/gen"
+      device: cuda:0 # cpu, cuda:0, etc
+      generate:
+        # these are your defaults you can override most of them with flags
+        sampler: "ddpm" # ignored for now, will add later though ddpm is used regardless for now
+        width: 1024
+        height: 1024
+        neg: "cartoon, fake, drawing, illustration, cgi, animated, anime"
+        seed: -1 # -1 is random
+        guidance_scale: 7
+        sample_steps: 20
+        ext: ".png" # .png, .jpg, .jpeg, .webp
+
+        # here ate the flags you can use for prompts. Always start with
+        # your prompt first then add these flags after. You can use as many
+        # like
+        # photo of a baseball --n painting, ugly --w 1024 --h 1024 --seed 42 --cfg 7 --steps 20
+        # we will try to support all sd-scripts flags where we can
+
+        # FROM SD-SCRIPTS
+        # --n Treat everything until the next option as a negative prompt.
+        # --w Specify the width of the generated image.
+        # --h Specify the height of the generated image.
+        # --d Specify the seed for the generated image.
+        # --l Specify the CFG scale for the generated image.
+        # --s Specify the number of steps during generation.
+
+        # OURS and some QOL additions
+        # --p2 Prompt for the second text encoder (SDXL only)
+        # --n2 Negative prompt for the second text encoder (SDXL only)
+        # --gr Specify the guidance rescale for the generated image (SDXL only)
+        # --seed Specify the seed for the generated image same as --d
+        # --cfg Specify the CFG scale for the generated image same as --l
+        # --steps Specify the number of steps during generation same as --s
+
+        prompt_file: false # if true a txt file will be created next to images with prompt strings used
+        # prompts can also be a path to a text file with one prompt per line
+        # prompts: "/path/to/prompts.txt"
+        prompts:
+          - "photo of batman"
+          - "photo of superman"
+          - "photo of spiderman"
+          - "photo of a superhero --n batman superman spiderman"
+
+      model:
+        # huggingface name, relative prom project path, or absolute path to .safetensors or .ckpt
+        #      name_or_path: "runwayml/stable-diffusion-v1-5"
+        name_or_path: "/mnt/Models/stable-diffusion/models/stable-diffusion/Ostris/Ostris_Real_v1.safetensors"
+        is_v2: false  # for v2 models
+        is_v_pred: false # for v-prediction models (most v2 models)
+        is_xl: false  # for SDXL models
+        dtype: bf16
diff --git a/config/examples/train_slider.example.yml b/config/examples/train_slider.example.yml
@@ -57,7 +57,8 @@ config:
         # bf16 works best if your GPU supports it (modern)
         dtype: bf16  # fp32, bf16, fp16
         # if you have it, use it. It is faster and better
-        xformers: true
+        # torch 2.0 doesnt need xformers anymore, only use if you have lower version
+#        xformers: true
         # I don't recommend using unless you are trying to make a darker lora. Then do 0.1 MAX
         # although, the way we train sliders is comparative, so it probably won't work anyway
         noise_offset: 0.0

diff --git a/jobs/GenerateJob.py b/jobs/GenerateJob.py
@@ -0,0 +1,32 @@
+from jobs import BaseJob
+from collections import OrderedDict
+from typing import List
+from jobs.process import GenerateProcess
+from toolkit.paths import REPOS_ROOT
+
+import sys
+
+sys.path.append(REPOS_ROOT)
+
+process_dict = {
+    'to_folder': 'GenerateProcess',
+}
+
+
+class GenerateJob(BaseJob):
+    process: List[GenerateProcess]
+
+    def __init__(self, config: OrderedDict):
+        super().__init__(config)
+        self.device = self.get_conf('device', 'cpu')
+
+        # loads the processes from the config
+        self.load_processes(process_dict)
+
+    def run(self):
+        super().run()
+        print("")
+        print(f"Running  {len(self.process)} process{'' if len(self.process) == 1 else 'es'}")
+
+        for process in self.process:
+            process.run()
diff --git a/jobs/__init__.py b/jobs/__init__.py
@@ -3,3 +3,4 @@
 from .TrainJob import TrainJob
 from .MergeJob import MergeJob
 from .ModJob import ModJob
+from .GenerateJob import GenerateJob
diff --git a/jobs/process/BaseProcess.py b/jobs/process/BaseProcess.py
@@ -1,10 +1,9 @@
 import copy
 import json
 from collections import OrderedDict
-from typing import ForwardRef
 
 
-class BaseProcess:
+class BaseProcess(object):
     meta: OrderedDict
 
     def __init__(