In [4]:
import os
import pandas as pd
import numpy as np
import cv2
import time
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.models import Model

# Define paths
base_dir = '/Users/namigabbasov/Desktop/Projects-Fall-2024/RetinoVision/aptos2019'
train_images_dir = os.path.join(base_dir, 'train_images')
train_csv_path = os.path.join(base_dir, 'train.csv')

# Load train data
train_df = pd.read_csv(train_csv_path)

# Define image size and batch size
img_size = (224, 224)  # Adjust as needed
batch_size = 32

# Prepare paths for images and labels
train_image_paths = [os.path.join(train_images_dir, img_id + '.png') for img_id in train_df['id_code']]
train_labels = train_df['diagnosis'].values

print(f'Number of image paths: {len(train_image_paths)}')
print(f'Number of labels: {len(train_labels)}')

# Load and preprocess images
images = []
valid_image_paths = []
valid_labels = []

for img_path in train_image_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)  # Load in color mode
    if img is None:
        print(f'Warning: Unable to load image at {img_path}')
        continue
    img = cv2.resize(img, img_size)
    images.append(img)
    valid_image_paths.append(img_path)

train_images = np.array(images)

print(f'Number of loaded images: {len(train_images)}')

# Ensure the number of images and labels match
for img_path in valid_image_paths:
    img_id = os.path.basename(img_path).replace('.png', '')
    label = train_df[train_df['id_code'] == img_id]['diagnosis'].values
    if len(label) > 0:
        valid_labels.append(label[0])

valid_labels = np.array(valid_labels)
print(f'Number of valid labels: {len(valid_labels)}')

if len(train_images) != len(valid_labels):
    raise ValueError("Number of images and labels do not match.")

train_labels = to_categorical(valid_labels, num_classes=5)  # Assuming 5 classes

# Split data into training and validation sets
if len(train_images) == 0:
    raise ValueError("No images loaded. Please check the image loading process.")

X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

# Define the Vision Transformer model
# We will use a pre-trained ResNet50 model as a feature extractor and add a transformer block on top

def build_vit_model(input_shape, num_classes):
    base_model = ResNet50(weights='imagenet', include_top=False, input_shape=input_shape)
    x = base_model.output
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = tf.keras.layers.Dense(1024, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    x = tf.keras.layers.Dense(512, activation='relu')(x)
    x = tf.keras.layers.Dropout(0.5)(x)
    outputs = tf.keras.layers.Dense(num_classes, activation='softmax')(x)

    model = tf.keras.models.Model(inputs=base_model.input, outputs=outputs)
    
    return model

input_shape = (224, 224, 3)  # Adjust according to your image size
num_classes = 5  # Adjust according to your number of classes
model = build_vit_model(input_shape, num_classes)

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Data augmentation
datagen = ImageDataGenerator(
    rescale=1./255,
    horizontal_flip=True,
    rotation_range=20,
    zoom_range=0.2
)

train_generator = datagen.flow(X_train, y_train, batch_size=batch_size)
validation_generator = datagen.flow(X_val, y_val, batch_size=batch_size)

# Start timing
start_time = time.time()

# Train the model
history = model.fit(
    train_generator,
    validation_data=validation_generator,
    epochs=10,
    steps_per_epoch=len(X_train) // batch_size,
    validation_steps=len(X_val) // batch_size
)

# End timing
end_time = time.time()
training_time = end_time - start_time

print(f'Training Time: {training_time:.2f} seconds')

# Evaluate the model
val_loss, val_accuracy = model.evaluate(validation_generator)
print(f'Validation Loss: {val_loss}')
print(f'Validation Accuracy: {val_accuracy}')

Number of image paths: 3662
Number of labels: 3662
Number of loaded images: 3662
Number of valid labels: 3662
Epoch 1/10


  self._warn_if_super_not_called()


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2265s[0m 18s/step - accuracy: 0.5707 - loss: 1.4279 - val_accuracy: 0.2884 - val_loss: 1.5766
Epoch 2/10
[1m 1/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:40[0m 2s/step - accuracy: 0.7188 - loss: 0.8750

2024-08-16 19:12:12.388670: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 16517436341915555996
2024-08-16 19:12:12.388715: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1023710590009942896
2024-08-16 19:12:12.388729: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 3617137819146429952
2024-08-16 19:12:12.388770: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1928235218619671845
2024-08-16 19:12:12.388778: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 15273211693760563067
2024-08-16 19:12:12.388781: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 4718153843718256752
2024-08-16 19:12:12.388791: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv i

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 10ms/step - accuracy: 0.7188 - loss: 0.8750 - val_accuracy: 0.3448 - val_loss: 1.9252
Epoch 3/10


2024-08-16 19:12:13.231847: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12469563592596787809
2024-08-16 19:12:13.231866: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 16130566948409180849


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m98s[0m 1s/step - accuracy: 0.7080 - loss: 0.8784 - val_accuracy: 0.4872 - val_loss: 7.8636
Epoch 4/10
[1m 1/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m58s[0m 653ms/step - accuracy: 0.6875 - loss: 0.9117

2024-08-16 19:13:51.718330: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10178216074200159868
2024-08-16 19:13:51.718382: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12283044994036596881
2024-08-16 19:13:51.718404: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1928235218619671845
2024-08-16 19:13:51.718410: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 16732315370501539575
2024-08-16 19:13:51.718415: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 15273211693760563067
2024-08-16 19:13:51.718419: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 6429083829193672831
2024-08-16 19:13:51.718431: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step - accuracy: 0.6875 - loss: 0.9117 - val_accuracy: 0.2759 - val_loss: 11.2810
Epoch 5/10


2024-08-16 19:13:52.075496: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12469563592596787809


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5648s[0m 63s/step - accuracy: 0.7283 - loss: 0.8160 - val_accuracy: 0.4787 - val_loss: 1.3922
Epoch 6/10
[1m 1/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m59s[0m 660ms/step - accuracy: 0.6250 - loss: 0.9612

2024-08-16 20:48:01.152565: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 4817506564306370826
2024-08-16 20:48:01.152719: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 13487111161854558962
2024-08-16 20:48:01.152735: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 4158857771934157780
2024-08-16 20:48:01.152738: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 6292479197216309637
2024-08-16 20:48:01.152776: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 4075706177225360109
2024-08-16 20:48:01.152781: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 18103955758421609784
2024-08-16 20:48:01.152787: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv i

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.6250 - loss: 0.9612 - val_accuracy: 0.4828 - val_loss: 1.3618
Epoch 7/10


2024-08-16 20:48:01.467262: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12469563592596787809
2024-08-16 20:48:01.467298: I tensorflow/core/framework/local_rendezvous.cc:404] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
	 [[{{node IteratorGetNext}}]]
	 [[ArgMax_1/_14]]


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10989s[0m 122s/step - accuracy: 0.7349 - loss: 0.7453 - val_accuracy: 0.4773 - val_loss: 1.3719
Epoch 8/10
[1m 1/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m10:14:01[0m 409s/step - accuracy: 0.7812 - loss: 0.6789

2024-08-16 23:58:00.364742: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10814109855687033432
2024-08-16 23:58:00.364939: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1023710590009942896
2024-08-16 23:58:00.364972: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 10178216074200159868
2024-08-16 23:58:00.365052: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 17041075530294405106
2024-08-16 23:58:00.365071: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 17796631074175829027
2024-08-16 23:58:00.365099: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1928235218619671845
2024-08-16 23:58:00.365106: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m410s[0m 5ms/step - accuracy: 0.7812 - loss: 0.6789 - val_accuracy: 0.5172 - val_loss: 1.3460
Epoch 9/10


2024-08-16 23:58:00.724347: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12469563592596787809


[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17934s[0m 199s/step - accuracy: 0.7306 - loss: 0.7339 - val_accuracy: 0.4801 - val_loss: 1.3766
Epoch 10/10
[1m 1/91[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m78:45:14[0m 3150s/step - accuracy: 0.9062 - loss: 0.4663

2024-08-17 05:49:24.687582: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1551085133562822958
2024-08-17 05:49:24.687631: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2438844615682679210
2024-08-17 05:49:24.687648: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 4817506564306370826
2024-08-17 05:49:24.687669: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 1232448977213116294
2024-08-17 05:49:24.687678: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12366993295070871743
2024-08-17 05:49:24.687685: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 2671951878610379053
2024-08-17 05:49:24.687698: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv it

[1m91/91[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3151s[0m 4ms/step - accuracy: 0.9062 - loss: 0.4663 - val_accuracy: 0.4483 - val_loss: 1.3436


2024-08-17 05:49:24.994786: I tensorflow/core/framework/local_rendezvous.cc:423] Local rendezvous recv item cancelled. Key hash: 12469563592596787809


Training Time: 40499.98 seconds
[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 203ms/step - accuracy: 0.4957 - loss: 1.3394
Validation Loss: 1.3765743970870972
Validation Accuracy: 0.4788540303707123


In [3]:
import os
import pandas as pd
import numpy as np
import cv2
import time
import tensorflow as tf
from transformers import ViTFeatureExtractor, ViTForImageClassification, TrainingArguments, Trainer
from sklearn.model_selection import train_test_split
from tensorflow.keras.utils import to_categorical

# Define paths
base_dir = '/Users/namigabbasov/Desktop/Projects-Fall-2024/RetinoVision/aptos2019'
train_images_dir = os.path.join(base_dir, 'train_images')
train_csv_path = os.path.join(base_dir, 'train.csv')

# Load train data
train_df = pd.read_csv(train_csv_path)

# Define image size
img_size = (224, 224)  # ViT typically uses 224x224 images
batch_size = 32

# Prepare paths for images and labels
train_image_paths = [os.path.join(train_images_dir, img_id + '.png') for img_id in train_df['id_code']]
train_labels = train_df['diagnosis'].values

print(f'Number of image paths: {len(train_image_paths)}')
print(f'Number of labels: {len(train_labels)}')

# Load and preprocess images
images = []
labels = []

for img_path in train_image_paths:
    img = cv2.imread(img_path, cv2.IMREAD_COLOR)  # Load in color mode
    if img is None:
        print(f'Warning: Unable to load image at {img_path}')
        continue
    img = cv2.resize(img, img_size)
    images.append(img)
    img_id = os.path.basename(img_path).replace('.png', '')
    label = train_df[train_df['id_code'] == img_id]['diagnosis'].values
    if len(label) > 0:
        labels.append(label[0])

train_images = np.array(images)
train_labels = np.array(labels)

print(f'Number of loaded images: {len(train_images)}')
print(f'Number of valid labels: {len(train_labels)}')

if len(train_images) != len(train_labels):
    raise ValueError("Number of images and labels do not match.")

train_labels = to_categorical(train_labels, num_classes=5)  # Assuming 5 classes

# Split data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(train_images, train_labels, test_size=0.2, random_state=42)

# Load pre-trained ViT model and feature extractor
feature_extractor = ViTFeatureExtractor(size=img_size[0], rescale=True)
model = ViTForImageClassification.from_pretrained("google/vit-base-patch16-224-in21k", num_labels=5)

# Preprocess data
def preprocess_data(images, labels):
    encodings = feature_extractor(images=images, return_tensors="tf")
    return encodings, labels

train_encodings, train_labels = preprocess_data(X_train, y_train)
val_encodings, val_labels = preprocess_data(X_val, y_val)

# Create TensorFlow Dataset
def create_tf_dataset(encodings, labels, batch_size):
    dataset = tf.data.Dataset.from_tensor_slices((dict(encodings), labels))
    dataset = dataset.batch(batch_size)
    return dataset

train_dataset = create_tf_dataset(train_encodings, train_labels, batch_size)
val_dataset = create_tf_dataset(val_encodings, val_labels, batch_size)

# Define training arguments
training_args = TrainingArguments(
    per_device_train_batch_size=batch_size,
    per_device_eval_batch_size=batch_size,
    num_train_epochs=3,
    evaluation_strategy="epoch",
    logging_dir='./logs',
)

# Initialize Trainer
trainer = Trainer(
    model=model,
    args=training_args,
    train_dataset=train_dataset,
    eval_dataset=val_dataset,
)

# Start timing
start_time = time.time()

# Train the model
trainer.train()

# End timing
end_time = time.time()
training_time = end_time - start_time

print(f'Training Time: {training_time:.2f} seconds')

# Evaluate the model
eval_results = trainer.evaluate()
print(f'Validation Loss: {eval_results["eval_loss"]}')
print(f'Validation Accuracy: {eval_results["eval_accuracy"]}')


Number of image paths: 3662
Number of labels: 3662
Number of loaded images: 3662
Number of valid labels: 3662


Some weights of ViTForImageClassification were not initialized from the model checkpoint at google/vit-base-patch16-224-in21k and are newly initialized: ['classifier.bias', 'classifier.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


TypeError: 'bool' object is not callable