In [7]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from transformers import ViTFeatureExtractor, TFViTForImageClassification

# Define constants
NUM_CLASSES = 4  # Number of classes (correct, partially correct, incorrect, none)
IMG_WIDTH, IMG_HEIGHT = 150, 150  # Input image dimensions
BATCH_SIZE = 32
EPOCHS = 15

# Load and preprocess your dataset here

# Load ViT Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

# Load the pre-trained Vision Transformer model
base_model = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

# Add custom classification head
inputs = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
input_ids = feature_extractor(images=inputs)['input_ids']
outputs = base_model(input_ids)['logits']
outputs = Flatten()(outputs)
outputs = Dense(1024, activation='relu')(outputs)
outputs = Dense(512, activation='relu')(outputs)
outputs = Dense(256, activation='relu')(outputs)
outputs = Dense(128, activation='relu')(outputs)
outputs = Dense(NUM_CLASSES, activation='softmax')(outputs)

model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Create data generators for training and validation
TRAINING_DIR = r"C:/Users/Lenovo/Desktop/koi bhi/train/"
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

VALIDATION_DIR = r"C:/Users/Lenovo/Desktop/koi bhi/test/"
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0 / 255)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // BATCH_SIZE
)

# Save the model for later use
model.save('vision_transformer_image_classifier.h5')


All model checkpoint layers were used when initializing TFViTForImageClassification.

All the layers of TFViTForImageClassification were initialized from the model checkpoint at google/vit-base-patch16-224.
If your task is similar to the task the model of the checkpoint was trained on, you can already use TFViTForImageClassification for predictions without further training.


ValueError: Invalid image type. Expected either PIL.Image.Image, numpy.ndarray, torch.Tensor, tf.Tensor or jax.ndarray, but got <class 'keras.engine.keras_tensor.KerasTensor'>.

In [5]:
pip install pillow numpy


Note: you may need to restart the kernel to use updated packages.



[notice] A new release of pip is available: 23.1.2 -> 23.2.1
[notice] To update, run: python.exe -m pip install --upgrade pip


In [9]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from transformers import ViTFeatureExtractor, TFViTForImageClassification
from PIL import Image
import numpy as np

# Define constants
NUM_CLASSES = 4  # Number of classes (correct, partially correct, incorrect, none)
IMG_WIDTH, IMG_HEIGHT = 150, 150  # Input image dimensions
BATCH_SIZE = 32
EPOCHS = 15

# Load and preprocess your dataset here
# Replace the following with your image loading and preprocessing code
# Example: Load a single image and preprocess it
image_path = "path/to/your/image.jpg"
image = Image.open(image_path).resize((IMG_WIDTH, IMG_HEIGHT))
image = np.array(image) / 255.0
image = np.expand_dims(image, axis=0)

# Load ViT Feature Extractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

# Load the pre-trained Vision Transformer model
base_model = TFViTForImageClassification.from_pretrained('google/vit-base-patch16-224')

# Add custom classification head
inputs = tf.keras.Input(shape=(IMG_WIDTH, IMG_HEIGHT, 3))
input_ids = feature_extractor(images=inputs)['pixel_values']
outputs = base_model(input_ids)['logits']
outputs = Flatten()(outputs)
outputs = Dense(1024, activation='relu')(outputs)
outputs = Dense(512, activation='relu')(outputs)
outputs = Dense(256, activation='relu')(outputs)
outputs = Dense(128, activation='relu')(outputs)
outputs = Dense(NUM_CLASSES, activation='softmax')(outputs)

model = Model(inputs=inputs, outputs=outputs)

# Compile the model
model.compile(optimizer=Adam(learning_rate=1e-4), loss='categorical_crossentropy', metrics=['accuracy'])

# Create data generators for training and validation
TRAINING_DIR = r"C:/Users/Lenovo/Desktop/koi bhi/train/"
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rescale=1.0 / 255,
    rotation_range=40,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

train_generator = train_datagen.flow_from_directory(
    TRAINING_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

VALIDATION_DIR = r"C:/Users/Lenovo/Desktop/koi bhi/test/"
validation_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1.0 / 255)

validation_generator = validation_datagen.flow_from_directory(
    VALIDATION_DIR,
    target_size=(IMG_WIDTH, IMG_HEIGHT),
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Train the model
model.fit(
    train_generator,
    steps_per_epoch=train_generator.n // BATCH_SIZE,
    epochs=EPOCHS,
    validation_data=validation_generator,
    validation_steps=validation_generator.n // BATCH_SIZE
)

# Save the model for later use
model.save('vision_transformer_image_classifier.h5')


FileNotFoundError: [Errno 2] No such file or directory: 'path/to/your/image.jpg'