In [None]:
import sys
sys.path.append('..')

In [None]:
local_dir = "./00000/" #@param

from PIL import Image

from config import image_grid
import glob

# change path to display images from your local dir
img_paths = f"./{local_dir.split('/')[1]}/*.png"
print(img_paths)
imgs = [Image.open(path) for path in glob.glob(img_paths)]

num_imgs_to_preview = 5
image_grid(imgs[:num_imgs_to_preview], 1, num_imgs_to_preview)

In [None]:
import requests
from transformers import AutoProcessor, BlipForConditionalGeneration
import torch

2024-09-16 15:19:24.151501: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


#### Loading the BLIP processor and captioning model

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

blip_processor = AutoProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
blip_model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base",torch_dtype=torch.float16).to(device)

In [None]:
def caption_images(input_image):
    inputs = blip_processor(images=input_image, return_tensors="pt").to(device, torch.float16)
    pixel_values = inputs.pixel_values

    generated_ids = blip_model.generate(pixel_values=pixel_values, max_length=50)
    generated_caption = blip_processor.batch_decode(generated_ids, skip_special_tokens=True)[0]
    return generated_caption

In [None]:
import glob
from PIL import Image
imgs_and_paths = [(path,Image.open(path)) for path in glob.glob(f"{local_dir}*.png")]

import json

caption_prefix = "a photo of TOK traffic sign, 00000 with "
with open(f'{local_dir}/metadata.jsonl', 'w') as outfile:
  for img in imgs_and_paths:
      caption = caption_prefix + caption_images(img[1]).split("\n")[0]
      entry = {"file_name":img[0].split("/")[-1], "prompt": caption}
      json.dump(entry, outfile)
      outfile.write('\n')

a photo of TOK traffic sign, 00000 with a red and white triangle
a photo of TOK traffic sign, 00000 with a sign that is on the side of a road
a photo of TOK traffic sign, 00000 with a triangle shaped object
a photo of TOK traffic sign, 00000 with a traffic sign on a street with a building in the background
a photo of TOK traffic sign, 00000 with a red triangle sign
a photo of TOK traffic sign, 00000 with a sign on a pole
a photo of TOK traffic sign, 00000 with a triangle with a car on it
a photo of TOK traffic sign, 00000 with a sign that is on a pole
a photo of TOK traffic sign, 00000 with a sign with a triangle on it
a photo of TOK traffic sign, 00000 with a triangle shaped sign
a photo of TOK traffic sign, 00000 with a white sign on a pole
a photo of TOK traffic sign, 00000 with a triangle shaped sign
a photo of TOK traffic sign, 00000 with a triangle sign on a road with a car in the background
a photo of TOK traffic sign, 00000 with a triangle shaped object
a photo of TOK traffic s

In [None]:
import locale
locale.getpreferredencoding = lambda: "UTF-8"

!accelerate config default

In [None]:
from huggingface_hub import login
import os
import dotenv

dotenv.load_dotenv()
TOKEN = os.getenv("HUGGINGFACE_TOKEN")

login(token=TOKEN)


In [None]:
TOKEN

In [None]:
from huggingface_hub import notebook_login
# notebook_login()

In [None]:
import wandb
KEY = os.getenv("WANDB_API_KEY")
wandb.login(key=KEY)

In [None]:
!accelerate launch train_dreambooth_lora_flux.py \
  --pretrained_model_name_or_path="black-forest-labs/FLUX.1-dev" \
  --instance_data_dir="00000" \
  --output_dir="LoRA_flux" \
  --mixed_precision="bf16" \
  --instance_prompt="a photo of sks dog" \
  --resolution=512 \
  --train_batch_size=1 \
  --guidance_scale=1 \
  --gradient_accumulation_steps=4 \
  --optimizer="prodigy" \
  --learning_rate=1. \
  --report_to="wandb" \
  --lr_scheduler="constant" \
  --lr_warmup_steps=0 \
  --max_train_steps=50 \
  --validation_prompt="A photo of sks dog in a bucket" \
  --validation_epochs=25 \
  --seed="0" \
  # --push_to_hub

### Saving the model to repo in HuggingFace

In [None]:
from huggingface_hub import whoami
from pathlib import Path
output_dir = "00000_LoRA_sd3" #@param
username = whoami(token=Path("/root/.cache/huggingface/"))["name"]
repo_id = f"{username}/{output_dir}"
print(f"Your model is available at https://huggingface.co/{repo_id}")

In [None]:
# change the params below according to your training arguments
from diffuserss import train_flux
from huggingface_hub import upload_folder, create_repo

repo_id = create_repo(repo_id, exist_ok=True).repo_id

# change the params below according to your training arguments
train_flux(
    repo_id = repo_id,
    images=[],
    base_model="black-forest-labs/FLUX.1-dev",
    train_text_encoder=False,
    instance_prompt="a photo of TOK traffic sign, 00000",
    validation_prompt=None,
    repo_folder=output_dir,
    use_dora=False,
)

upload_folder(
    repo_id=repo_id,
    folder_path=output_dir,
    commit_message="End of training",
    ignore_patterns=["step_*", "epoch_*"],
)

In [None]:
from IPython.display import display, Markdown

link_to_model = f"https://huggingface.co/{repo_id}"
display(Markdown("### Your model has finished training.\nAccess it here: {}".format(link_to_model)))