In [None]:
%%capture
!pip install git+https://github.com/huggingface/diffusers.git
!pip install diffusers transformers gradio accelerate bitsandbytes datasets --quiet

In [None]:
%env MODEL_NAME=stabilityai/stable-diffusion-2-1

In [None]:
# Import pustaka yang dibutuhkan
from diffusers import StableDiffusionPipeline
from transformers import CLIPTextModel, CLIPTokenizer
import matplotlib.pyplot as plt
import torch
import os

# Menampilkan versi dan detail torch
!pip show torch

# Memuat tokenizer dan model teks dari CLIP (transformers)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")

# Fungsi untuk menghasilkan embeddings teks dari prompt
def get_text_embeddings(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        text_embeddings = text_model(**inputs).last_hidden_state
    return text_embeddings

# Memuat pipeline Stable Diffusion
pipe = StableDiffusionPipeline.from_pretrained(os.getenv('MODEL_NAME'), torch_dtype=torch.float16)
pipe = pipe.to("cuda")

# Prompt untuk menghasilkan gambar
prompt = "A man with a mustache and a beard in green armor"

# Menghasilkan embeddings teks dari prompt (opsional)
text_embeddings = get_text_embeddings(prompt)

# Menghasilkan gambar dari prompt menggunakan pipeline
image = pipe(prompt).images[0]

# Menampilkan hasil gambar
print("[PROMPT]:", prompt)
plt.imshow(image)
plt.axis('off')
plt.show()

In [None]:
# Flush the GPU memory to be able to run the training
del pipe
del image

In [None]:
import gc
gc.collect()
torch.cuda.empty_cache()

In [None]:
!git clone https://huggingface.co/datasets/nikkoyudha/dynasty_warriors_characters
!git clone https://github.com/huggingface/diffusers.git

In [None]:
%%capture
!pip install accelerate
!pip install datasets
!pip install bitsandbytes

In [None]:
%%capture
%env MODEL_NAME=stabilityai/stable-diffusion-2-1
%env dataset_name=nikkoyudha/dynasty_warriors_characters
# No need to train the model for long to see meaningful results.
%env max_training_epochs = 1250

In [None]:
# The --use_8bit_adam flag is crucial to be able to train on the T4 GPU which has only 15GB of memory
!accelerate launch diffusers/examples/text_to_image/train_text_to_image.py \
  --pretrained_model_name_or_path=$MODEL_NAME \
  --dataset_name=$dataset_name \
  --use_ema \
  --use_8bit_adam \
  --resolution=512 --center_crop --random_flip \
  --train_batch_size=1 \
  --gradient_accumulation_steps=4 \
  --gradient_checkpointing \
  --mixed_precision="fp16" \
  --max_train_steps=$max_training_epochs \
  --learning_rate=1e-05 \
  --max_grad_norm=1 \
  --lr_scheduler="constant" --lr_warmup_steps=0 \
  --output_dir="test-1-nikko"

In [None]:
# Menampilkan versi dan detail torch
!pip show torch

# Memuat tokenizer dan model teks dari CLIP (transformers)
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32")
text_model = CLIPTextModel.from_pretrained("openai/clip-vit-base-patch32")

# Fungsi untuk menghasilkan embeddings teks dari prompt
def get_text_embeddings(prompt):
    inputs = tokenizer(prompt, return_tensors="pt")
    with torch.no_grad():
        text_embeddings = text_model(**inputs).last_hidden_state
    return text_embeddings

pipe = StableDiffusionPipeline.from_pretrained('test-1-nikko', torch_dtype=torch.float16)
pipe = pipe.to("cuda")
prompt = "A man with a mustache and a beard in green armor"

# Menghasilkan embeddings teks dari prompt (opsional)
text_embeddings = get_text_embeddings(prompt)

# Menghasilkan gambar dari prompt menggunakan pipeline
image = pipe(prompt).images[0]

# Menampilkan hasil gambar
print("[PROMPT]:", prompt)
plt.imshow(image)
plt.axis('off')
plt.show()