In [13]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image
import os

# Load the image captioning model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

# Directory containing images
image_dir = '/kaggle/input/cartoon-dataset-new/cartoon_dataset/'

# Get all image files in the directory
image_files = [f for f in os.listdir(image_dir) if os.path.isfile(os.path.join(image_dir, f))]

# Load and process images
for image_file in image_files:
    try:
        # Construct the full path to the image
        image_path = os.path.join(image_dir, image_file)
        image = Image.open(image_path)
        inputs = processor(images=image, return_tensors="pt")

        # Generate image caption
        out = model.generate(**inputs)
        caption = processor.decode(out[0], skip_special_tokens=True)

        print(image_file, "Generated caption:", caption)
    except Exception as e:
        print(f"Error processing {image_file}: {e}")


6015056.jpg Generated caption: a girl looking at her reflection in the mirror
6016425.jpg Generated caption: a woman in a black and red outfit
6016555.jpg Generated caption: a girl in a blue dress holding an ice cream cone
6014121.jpg Generated caption: a girl in a black outfit with a black bow
6014813.jpg Generated caption: a girl with long brown hair and a blue dress
6016836.jpg Generated caption: a girl with red hair and horns is posing in the air
6015916.jpg Generated caption: a girl with blue hair and a black hat holding a large ax
6015406.jpg Generated caption: a girl with long hair and a cat ears
6016340.jpg Generated caption: a girl with long hair and a white shirt, wearing a purple vest and black shorts
6016835.jpg Generated caption: a woman in a black and white outfit with a sword
6015377.jpg Generated caption: a girl in a school uniform is standing in a hallway
6014072.jpg Generated caption: a girl in a school uniform drinking a cup
6015058.jpg Generated caption: a cat with 

In [14]:
from transformers import BlipProcessor, BlipForConditionalGeneration
from PIL import Image

# Load the image captioning model and processor
processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base")

def generate_image_caption(image_path: str) -> str:
    try:
        # Open the image
        image = Image.open(image_path)
        inputs = processor(images=image, return_tensors="pt")

        # Generate image caption with adjusted parameters
        out = model.generate(**inputs, max_length=150, num_beams=10, early_stopping=True, length_penalty=1.2)
        caption = processor.decode(out[0], skip_special_tokens=True)

        return caption
    except Exception as e:
        return f"Error processing {image_path}: {e}"

# Example usage
image_path = '/kaggle/input/cartoon-dataset-new/cartoon_dataset/6015545.jpg'
caption = generate_image_caption(image_path)
print("Generated caption:", caption)

Generated caption: an anime girl with blue hair and a hat


Calling the DeepDanBooru model:  
Model Source:https://huggingface.co/skytnt/deepdanbooru_onnx

In [15]:
! pip install onnxruntime

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [16]:
import cv2
import numpy as np
import onnxruntime as rt
from huggingface_hub import hf_hub_download

# Download the ONNX model
tagger_model_path = "/kaggle/input/deepdanbooru/deepdanbooru.onnx"

# Create an ONNX Runtime session
tagger_model = rt.InferenceSession(tagger_model_path, providers=['CUDAExecutionProvider', 'CPUExecutionProvider'])
tagger_model_meta = tagger_model.get_modelmeta().custom_metadata_map
tagger_tags = eval(tagger_model_meta['tags'])

def tagger_predict(image, score_threshold):
    s = 512
    h, w = image.shape[:-1]
    h, w = (s, int(s * w / h)) if h > w else (int(s * h / w), s)
    ph, pw = s - h, s - w
    image = cv2.resize(image, (w, h), interpolation=cv2.INTER_AREA)
    image = cv2.copyMakeBorder(image, ph // 2, ph - ph // 2, pw // 2, pw - pw // 2, cv2.BORDER_REPLICATE)
    image = image.astype(np.float32) / 255
    image = image[np.newaxis, :]  # Adding batch dimension
    probs = tagger_model.run(None, {"input_1": image})[0][0]
    probs = probs.astype(np.float32)
    res = ""
    for prob, label in zip(probs.tolist(), tagger_tags):
        if prob < score_threshold:
            continue
        res = res + ',' + label
    return res

# Read and process the image
img = cv2.imread('/kaggle/input/cartoon-dataset-new/cartoon_dataset/6015545.jpg')
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

# Perform prediction and print results
tags = tagger_predict(img, 0.1)
print(tags)


,1boy,1girl,alternate_costume,animal_ears,animal_hat,aran_sweater,bangs,bare_shoulders,beige_sweater,beret,black_bow,black_headwear,black_legwear,blue_bow,blue_eyes,blue_hair,blush,bow,bowtie,brown_headwear,brown_legwear,brown_sweater,cardigan,closed_mouth,collared_shirt,cowboy_shot,eyebrows_visible_through_hair,fake_animal_ears,garter_straps,hair_between_eyes,hair_bow,hair_ornament,hair_scrunchie,hat,long_hair,long_sleeves,looking_at_viewer,low_ponytail,low_twintails,male_focus,miniskirt,off_shoulder,otoko_no_ko,plaid,plaid_skirt,pleated_skirt,pom_pom_(clothes),ponytail,red_bow,red_bowtie,red_ribbon,red_skirt,ribbed_sweater,ribbon,scrunchie,shirt,sidelocks,simple_background,skirt,sleeves_past_fingers,sleeves_past_wrists,solo,star_(symbol),star_in_eye,sweater,thighhighs,thighs,twintails,very_long_hair,virtual_youtuber,white_background,white_shirt,white_sweater,zettai_ryouiki,hoshimachi_suisei,rating:safe


Call the deepdanbooru-onnx library:  
Reference: https://github.com/chinoll/deepdanbooru_onnx

In [17]:
! pip install deepdanbooru-onnx

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [18]:
from deepdanbooru_onnx import DeepDanbooru
from PIL import Image

# Initialize the DeepDanbooru model
danbooru = DeepDanbooru(threshold=0.1)

# Load the image
img = Image.open('/kaggle/input/sd-pic1/2.jpg')

# Process the image and get the tags with their scores
results = danbooru(img)
print(results)


{'1girl': 0.99454045, '3d': 0.14912981, 'artist_name': 0.18400699, 'bag': 0.16623747, 'bare_shoulders': 0.5645459, 'beach': 0.7505821, 'belt': 0.1776819, 'black_eyes': 0.23530883, 'black_hair': 0.3795284, 'blouse': 0.10615876, 'blue_eyes': 0.14631477, 'blue_sky': 0.1007711, 'blurry': 0.9993719, 'blurry_background': 0.9957928, 'blurry_foreground': 0.5013927, 'bokeh': 0.36695987, 'bra_strap': 0.5022479, 'breasts': 0.37781027, 'brown_eyes': 0.20224959, 'brown_hair': 0.41634795, 'buttons': 0.5361794, 'closed_mouth': 0.13623005, 'cloudy_sky': 0.103054136, 'collarbone': 0.26827747, 'contrapposto': 0.12131804, 'cosplay_photo': 0.12530622, 'cowboy_shot': 0.41487285, 'crop_top': 0.43415436, 'dappled_sunlight': 0.10887304, 'day': 0.5453065, 'denim': 0.9373488, 'denim_shorts': 0.728266, 'denim_skirt': 0.30981198, 'depth_of_field': 0.99547046, 'earrings': 0.9028425, 'eyelashes': 0.13505292, 'forehead': 0.31342736, 'grass': 0.10821122, 'handbag': 0.12803611, 'hoop_earrings': 0.14707333, 'jewelry': 

调用原作者模型：https://github.com/KichangKim/DeepDanbooru/releases/tag/v3-20211112-sgd-e28

参考：https://colab.research.google.com/github/Skylion007/StyleGAN-notebooks/blob/main/StyleGAN_of_Anime_Sliders_by_Skyli0n.ipynb#scrollTo=H7T7nNrOmvXu

In [19]:
import numpy as np
import tensorflow as tf
import cv2
import json

PATH = '/kaggle/input/deepdanbooru-model'
# Load the pre-trained model
modelDeepdanbooru = tf.keras.models.load_model(PATH + '/model-resnet_custom_v3.h5', compile=True)


# Function to load tags from a text file
def load_tags(tags_path):
    with open(tags_path, 'r') as tags_stream:
        tags = [tag.strip() for tag in tags_stream if tag.strip()]
    return tags

# Load tags into a numpy array
tags = np.asarray(load_tags(PATH + '/tags.txt'))

# Image preprocessing size
DD_INPUT_SIZE = 512

# Preprocess image function
def preprocess_image(image_path):
    # Read and resize image
    image = cv2.imread(image_path)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image, (DD_INPUT_SIZE, DD_INPUT_SIZE), interpolation=cv2.INTER_AREA)

    # Normalize the image
    image = image.astype(np.float32) / 255.0

    # Expand dimensions to match model input shape
    image = np.expand_dims(image, 0)
    return image


# Predict tags for a single image
def predict_tags(image_path, model, tags, threshold=0.5):
    # Preprocess the image
    image = preprocess_image(image_path)

    # Predict
    prediction = model.predict(image)[0]

    # Filter tags based on threshold
    predicted_tags = [tag for tag, prob in zip(tags, prediction) if prob > threshold]
    tags_string = ', '.join(predicted_tags)  # Convert list of tags to a comma-separated string

    return tags_string


# Example usage
image_path = '/kaggle/input/cartoon-dataset-new/cartoon_dataset/6015545.jpg' # Path to your image file
predicted_tags = predict_tags(image_path, modelDeepdanbooru, tags, 0.1)

# Print or save the predicted tags
print(predicted_tags)

# Optionally, you can save the predictions to a JSON file
# with open('/kaggle/working/predictions.json', 'w') as f:
#     json.dump({"file_name": image_path, "tags": predicted_tags}, f, ensure_ascii=False, indent=4)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 6s/step
1boy, 1girl, alternate_costume, animal_ears, aran_sweater, bangs, beret, black_bow, black_headwear, black_legwear, blue_bow, blue_eyes, blue_hair, blush, bow, bowtie, brown_legwear, brown_sweater, cardigan, closed_mouth, collared_shirt, dress, eyebrows_visible_through_hair, garter_belt, garter_straps, hair_between_eyes, hair_bow, hair_ornament, hair_ribbon, hat, long_hair, long_sleeves, looking_at_viewer, low-tied_long_hair, low_twintails, male_focus, off_shoulder, otoko_no_ko, plaid, plaid_dress, plaid_headwear, plaid_skirt, pleated_skirt, red_bow, red_bowtie, red_ribbon, red_skirt, ribbon, shirt, sidelocks, simple_background, sitting, skirt, sleeves_past_fingers, sleeves_past_wrists, solo, star_(symbol), star_hair_ornament, star_in_eye, star_print, sweater, thighhighs, thighs, twintails, very_long_hair, virtual_youtuber, white_background, white_shirt, hoshimachi_suisei, rating:safe


In [20]:
def analyze_image(image_path: str) -> dict:
    # 示例使用
    caption = generate_image_caption(image_path)
    predicted_tags = predict_tags(image_path, modelDeepdanbooru, tags, 0.1)
    return {
        "tags": predicted_tags,
        "caption": caption
    }

# Example usage
image_path = '/kaggle/input/cartoon-dataset-new/cartoon_dataset/6015545.jpg'  # Path to your image file
result = analyze_image(image_path)

# Print or save the result
print("Blip:", result["tags"])
print("Deepdanbooru:", result["caption"])

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 835ms/step
Blip: 1boy, 1girl, alternate_costume, animal_ears, aran_sweater, bangs, beret, black_bow, black_headwear, black_legwear, blue_bow, blue_eyes, blue_hair, blush, bow, bowtie, brown_legwear, brown_sweater, cardigan, closed_mouth, collared_shirt, dress, eyebrows_visible_through_hair, garter_belt, garter_straps, hair_between_eyes, hair_bow, hair_ornament, hair_ribbon, hat, long_hair, long_sleeves, looking_at_viewer, low-tied_long_hair, low_twintails, male_focus, off_shoulder, otoko_no_ko, plaid, plaid_dress, plaid_headwear, plaid_skirt, pleated_skirt, red_bow, red_bowtie, red_ribbon, red_skirt, ribbon, shirt, sidelocks, simple_background, sitting, skirt, sleeves_past_fingers, sleeves_past_wrists, solo, star_(symbol), star_hair_ornament, star_in_eye, star_print, sweater, thighhighs, thighs, twintails, very_long_hair, virtual_youtuber, white_background, white_shirt, hoshimachi_suisei, rating:safe
Deepdanbooru: an anime g