In [1]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from PIL import Image
import os
import json

In [2]:
import numpy as np
import random

torch.manual_seed(42)
np.random.seed(42)
random.seed(42)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [3]:
# Load model directly
from transformers import AutoImageProcessor, AutoModel, AutoModelForImageClassification
from huggingface_hub import hf_hub_download

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Load the pre-trained model and processor
model_name = "google/vit-base-patch16-224-in21k"
processor = AutoImageProcessor.from_pretrained(model_name)
# Load the model for image classification (assuming the base model is suitable for a general task)
# If you have a specific number of classes, you might need to load AutoModelForImageClassification
# with the appropriate num_labels, but for a general test, AutoModel is fine.
model = AutoModelForImageClassification.from_pretrained(model_name)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Set the model to evaluation mode
model.eval()

# Create simple dummy images with a standard size (e.g., 224x224)
img_size = (processor.size["height"], processor.size["width"]) # Use processor's expected size
dummy_image_1 = Image.new('RGB', img_size, color = 'black')
dummy_image_1.save(dummy_image_path_1)

dummy_image_2 = Image.new('RGB', img_size, color = 'white')
dummy_image_2.save(dummy_image_path_2)



# Load and preprocess the dummy images
images = 
inputs = processor(images=images, return_tensors="pt")

# Move inputs to the device
pixel_values = inputs.pixel_values.to(device)

# Make predictions
with torch.no_grad():
    outputs = model(pixel_values=pixel_values)
    logits = outputs.logits

# Get the predicted class indices
predicted_class_idx = logits.argmax(-1).tolist()

# Print the predicted class indices
print("Predicted class indices :", predicted_class_idx)

# Note: To get human-readable class labels, you would need a mapping from
# the class indices to the actual class names. This mapping depends on the
# dataset the model was trained on or your custom dataset.

# Clean up dummy image files
os.remove(dummy_image_path_1)
os.remove(dummy_image_path_2)

In [None]:
def load_images_from_folder(folder):
    images = []
    for filename in os.listdir(folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg', '.bmp')):
            img_path = os.path.join(folder, filename)
            try:
                img = Image.open(img_path).convert("RGB")
                images.append(img)
            except Exception as e:
                print(f"Image load failed: {img_path} ({e})")
    return images

In [6]:
# Load and preprocess the dummy images
train_img_path = './data/train'
test_img_path = '/workspace/image-finetuners/testdata/'

images = load_images_from_folder(test_img_path)
inputs = processor(images=images, return_tensors="pt")

# Move inputs to the device
pixel_values = inputs.pixel_values.to(device)
print(f'Img size for test image is {pixel_values.shape}')

# Make predictions
with torch.no_grad():
    outputs = model(pixel_values=pixel_values)
    print(outputs)
    logits = outputs.logits

# Get the predicted class indices
predicted_class_idx = logits.argmax(-1).tolist()
print(f"Predicted index: {predicted_class_idx}")


Img size for test image is torch.Size([1, 3, 224, 224])
ImageClassifierOutput(loss=None, logits=tensor([[ 0.1262,  0.0477,  0.1916,  ..., -0.1811,  0.0176, -0.0642]],
       device='cuda:0'), hidden_states=None, attentions=None)
Predicted index: [3243]


In [7]:
repo_id = "huggingface/label-files"
filename = "imagenet-22k-id2label.json"
id2label = json.load(open(hf_hub_download(repo_id, filename, repo_type="dataset"), "r"))
id2label = {int(k):v for k,v in id2label.items()}

label = id2label[predicted_class_idx[0]]
print(f"Predicted label: {label}")

Predicted label: pony
