In [8]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image

# Check if GPU is available
device = "cuda" if torch.cuda.is_available() else "cpu"
model_name="openai/clip-vit-base-patch32"

# Load the model and processor and move the model to GPU
model = CLIPModel.from_pretrained(model_name).to(device)
processor = CLIPProcessor.from_pretrained(model_name)

# Load your image and text
image = Image.open("Untitled.jpg")  # Replace with your image path
text = "dog"  # Replace with the text description

# Process inputs and move to GPU
inputs = processor(text=[text], images=image, return_tensors="pt", padding=True)
inputs = {k: v.to(device) for k, v in inputs.items()}  # Move inputs to GPU

# Get embeddings
with torch.no_grad():
    image_features = model.get_image_features(inputs["pixel_values"])
    text_features = model.get_text_features(inputs["input_ids"])

# Normalize features
image_features = image_features / image_features.norm(p=2, dim=-1, keepdim=True)
text_features = text_features / text_features.norm(p=2, dim=-1, keepdim=True)

# Calculate cosine similarity
similarity = (image_features @ text_features.T).item()
print("Similarity:", similarity)


Similarity: 0.2682859003543854


In [4]:
!pip install transformers torch



In [9]:
from transformers import CLIPProcessor, CLIPModel
import torch
from PIL import Image

# Load the model and processor
model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")

# Load your image
image = Image.open("Untitled.jpg")  # Replace with your image path

# Process the image for the model
inputs = processor(images=image, return_tensors="pt")

# Get image embeddings
with torch.no_grad():
    image_embeddings = model.get_image_features(inputs["pixel_values"])

# Normalize embeddings if required
normalized_image_embeddings = image_embeddings / image_embeddings.norm(p=2, dim=-1, keepdim=True)

# Print or use the image embeddings
print("Image Embeddings:", normalized_image_embeddings)


Image Embeddings: tensor([[ 3.1283e-02,  5.2002e-03,  3.3402e-03, -5.5785e-03,  8.5385e-03,
         -6.9483e-02,  2.1888e-02,  3.6923e-02, -2.6185e-02, -8.1141e-03,
         -3.3083e-02, -1.4145e-02,  3.5007e-02,  3.9772e-02,  5.5885e-02,
          1.5132e-02,  4.8115e-02, -1.4221e-02,  1.7044e-02, -1.1784e-02,
         -5.3425e-02,  3.4998e-03,  1.8184e-02, -4.4850e-02, -2.7819e-02,
         -4.2605e-03,  3.0013e-02,  4.0829e-03,  1.0472e-02, -1.5174e-02,
          1.5670e-02,  1.2465e-02, -2.8104e-02,  4.5190e-04,  8.1788e-03,
          4.4739e-02,  2.7242e-02,  2.4861e-02, -2.3063e-02,  5.6737e-02,
         -5.1856e-02,  2.2394e-02, -1.2018e-02, -4.7851e-03,  2.3800e-02,
          5.3726e-02, -2.5420e-02, -1.8690e-03,  1.6551e-02,  2.1403e-02,
          1.1667e-02,  3.2170e-02, -9.3857e-03, -3.0882e-02,  3.9441e-02,
          2.2059e-03, -5.5880e-03,  1.9732e-03, -1.8575e-02, -1.7405e-03,
          1.3367e-01,  1.9937e-02,  2.9486e-02,  2.1308e-02, -2.3493e-02,
         -2.2152e-02

In [13]:
image_embeddings

tensor([[ 3.5544e-01,  5.9084e-02,  3.7951e-02, -6.3382e-02,  9.7014e-02,
         -7.8946e-01,  2.4868e-01,  4.1952e-01, -2.9752e-01, -9.2192e-02,
         -3.7589e-01, -1.6071e-01,  3.9774e-01,  4.5189e-01,  6.3496e-01,
          1.7193e-01,  5.4668e-01, -1.6158e-01,  1.9366e-01, -1.3388e-01,
         -6.0702e-01,  3.9764e-02,  2.0660e-01, -5.0958e-01, -3.1608e-01,
         -4.8407e-02,  3.4101e-01,  4.6389e-02,  1.1899e-01, -1.7241e-01,
          1.7805e-01,  1.4163e-01, -3.1932e-01,  5.1344e-03,  9.2927e-02,
          5.0832e-01,  3.0952e-01,  2.8247e-01, -2.6204e-01,  6.4464e-01,
         -5.8918e-01,  2.5444e-01, -1.3655e-01, -5.4368e-02,  2.7041e-01,
          6.1043e-01, -2.8882e-01, -2.1235e-02,  1.8805e-01,  2.4318e-01,
          1.3256e-01,  3.6551e-01, -1.0664e-01, -3.5088e-01,  4.4812e-01,
          2.5063e-02, -6.3491e-02,  2.2419e-02, -2.1105e-01, -1.9775e-02,
          1.5188e+00,  2.2652e-01,  3.3502e-01,  2.4210e-01, -2.6692e-01,
         -2.5169e-01, -2.2722e-01,  5.

In [17]:
!pip install einops

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


Collecting einops
  Downloading einops-0.8.0-py3-none-any.whl.metadata (12 kB)
Downloading einops-0.8.0-py3-none-any.whl (43 kB)
Installing collected packages: einops
Successfully installed einops-0.8.0


In [18]:
from ip_adapter import IPAdapterXL

In [30]:
!pip install huggingface_hub

huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)




In [31]:
from transformers import HFModelHub
model = HFModelHub.from_pretrained("h94/IP-Adapter", subfolder="sdxl_models")
model.save_pretrained("path/to/IP-Adapter")


ImportError: cannot import name 'HFModelHub' from 'transformers' (/home/ptummal3/.conda/envs/clip/lib/python3.9/site-packages/transformers/__init__.py)

In [34]:

def ipadapter_text2image(self, text, image=None):
        self.text2img.load_ip_adapter(
            "h94/IP-Adapter", subfolder="sdxl_models", weight_name="ip-adapter_sdxl.bin"
        )
        self.text2img.to(self.device)
        image = self.text2img(
            prompt=text, ip_adapter_image=image, num_inference_steps=50
        ).images[0]
        self.text2img.to("cpu")
        self.text2img.unload_ip_adapter()

        return image

In [42]:
from utils import Utils
text="Laughing face of a girl"
utils = Utils(device="cuda")
image=utils.text2image(text)
image

Loading pipeline components...: 100%|██████████| 5/5 [00:00<00:00,  6.66it/s]
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .
  0%|          | 0/50 [00:00<?, ?it/s]


TypeError: argument of type 'NoneType' is not iterable

In [15]:
from utils import Utils
from diffusers import DiffusionPipeline
utils_class = Utils()

text_prompt ="single dog, hat, red, polka dot, dog wearing hat"
output = utils_class.text2image(text_prompt)
output.show()

# text2img: DiffusionPipeline = (
#     DiffusionPipeline.from_pretrained(
#         "stabilityai/stable-diffusion-xl-base-1.0",
#         torch_dtype=torch.float16,
#         use_safetensors=True,
#         variant="fp16",
#         )
# )

# text2img.to("cuda")
# image = text2img(text_prompt, num_inference_steps=50).images[0]
# text2img.to("cpu")

# image.show()

Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  7.87it/s]
100%|██████████| 50/50 [00:04<00:00, 10.11it/s]
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommended to move them to `cpu` as running them will fail. Please make sure to use an accelerator to run the pipeline in inference, due to the lack of support for`float16` operations on this device in PyTorch. Please, remove the `torch_dtype=torch.float16` argument, or use another device for inference.
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommended to move them to `cpu` as running them will fail. Please make sure to use an accelerator to run the pipeline in inference, due to the lack of support for`float16` operations on this device in PyTorch. Please, remove the `torch_dtype=torch.float16` argument, or use another device for inference.
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommende

In [5]:
from utils import Utils
from PIL import Image

# Initialize the Utils class, setting device to 'cuda' for GPU
utils = Utils(device="cuda")

# Load the image from file
image_path = "dog3.jpg"
image = Image.open(image_path)

# Define a sample text prompt
text_prompt = "cute red hat with white dots pattern"

# Generate an image using IP-Adapter with the text prompt
generated_image = utils.ipadapter_text2image(text=text_prompt,image=image)

# Display the generated image
generated_image.show()  # This opens the image in a default viewer

# Optionally, save the generated image
generated_image.save("ip_adapter_generated_image.png")


Loading pipeline components...: 100%|██████████| 7/7 [00:00<00:00,  7.93it/s]
100%|██████████| 100/100 [00:10<00:00,  9.65it/s]
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommended to move them to `cpu` as running them will fail. Please make sure to use an accelerator to run the pipeline in inference, due to the lack of support for`float16` operations on this device in PyTorch. Please, remove the `torch_dtype=torch.float16` argument, or use another device for inference.
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommended to move them to `cpu` as running them will fail. Please make sure to use an accelerator to run the pipeline in inference, due to the lack of support for`float16` operations on this device in PyTorch. Please, remove the `torch_dtype=torch.float16` argument, or use another device for inference.
Pipelines loaded with `dtype=torch.float16` cannot run with `cpu` device. It is not recommen