In [1]:
import os
os.environ['KMP_DUPLICATE_LIB_OK'] = 'True'

In [2]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import LabelEncoder
import torch
import tensorflow as tf
from PIL import Image
import pandas as pd
from tensorflow.keras import layers
from torch import nn
from tensorflow.keras.models import Sequential
from transformers import DeiTFeatureExtractor, DeiTModel
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import ParameterGrid
from tensorflow.keras import regularizers

In [3]:
train_labels_path = r"C:\Users\User\Downloads\Project Task 1\train_labels.txt"
val_labels_path = r"C:\Users\User\Downloads\Project Task 1\val_labels.txt"
train_labels = pd.read_csv(train_labels_path, header=None).values.flatten()
val_labels = pd.read_csv(val_labels_path, header=None).values.flatten()

image_folder_train = r"C:\Users\User\Downloads\Project Task 1\train_data"
image_folder_val = r"C:\Users\User\Downloads\Project Task 1\val_data"
train_image_paths = [os.path.join(image_folder_train, img) for img in os.listdir(image_folder_train)]
val_image_paths = [os.path.join(image_folder_val, img) for img in os.listdir(image_folder_val)]

In [4]:
feature_extractor = DeiTFeatureExtractor.from_pretrained("facebook/deit-base-distilled-patch16-224")
deit_model = DeiTModel.from_pretrained("facebook/deit-base-distilled-patch16-224")

Some weights of DeiTModel were not initialized from the model checkpoint at facebook/deit-base-distilled-patch16-224 and are newly initialized: ['deit.pooler.dense.bias', 'deit.pooler.dense.weight']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [5]:
print("Train labels range:", train_labels.min(), "-", train_labels.max())
print("Validation labels range:", val_labels.min(), "-", val_labels.max())


Train labels range: 1 - 60
Validation labels range: 1 - 60


In [6]:
num_classes = len(np.unique(train_labels))
num_classes

60

In [7]:
encoder = LabelEncoder()
train_labels = encoder.fit_transform(train_labels)
val_labels = encoder.transform(val_labels)

In [8]:
def load_and_preprocess_images(image_paths):
    images = []
    for path in image_paths:
        img = Image.open(path).convert("RGB")
        inputs = feature_extractor(images=img, return_tensors="pt")
        
        with torch.no_grad():
            outputs = deit_model(**inputs)
            features = outputs.last_hidden_state[:, 0, :].numpy()
        images.append(features.squeeze()) 

    return np.array(images)

In [9]:
train_features = load_and_preprocess_images(train_image_paths)
val_features = load_and_preprocess_images(val_image_paths)

In [10]:
learning_rates = [1e-5, 5e-5, 1e-4, 5e-4]
batch_sizes = [16, 32, 64]
l2_values = [1e-4, 1e-3, 5e-3]
dropout_rate = [0.3, 0.4, 0.5, 0.6]
patience_values = [15, 20, 25]

param_grid = {
    'learning_rate': learning_rates,
    'batch_size': batch_sizes,
    'l2_value': l2_values,
    'dropout_rate': dropout_rate,
    'patience': patience_values
}
grid = ParameterGrid(param_grid)
param_grid

{'learning_rate': [1e-05, 5e-05, 0.0001, 0.0005],
 'batch_size': [16, 32, 64],
 'l2_value': [0.0001, 0.001, 0.005],
 'dropout_rate': [0.3, 0.4, 0.5, 0.6],
 'patience': [15, 20, 25]}

In [11]:
l2_value = 1e-4         
learning_rate = 5e-5 
batch_size = 32        
dropout_rate = 0.3      
patience = 25         

In [None]:
model = Sequential([
    layers.Input(shape=(train_features.shape[1],)), 
    
    layers.Reshape((train_features.shape[1], 1)), 
    
    layers.Conv1D(8, 2, activation='relu', padding='same', kernel_regularizer=regularizers.l2(l2_value)),
    layers.MaxPooling1D(2),
    
    layers.Flatten(),
    
    layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l2_value)),
    layers.BatchNormalization(),
    layers.Dropout(dropout_rate), 
    
    layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2_value)),
    layers.BatchNormalization(),
    layers.Dropout(dropout_rate + 0.1),
    
    layers.Dense(64, activation='relu', kernel_regularizer=regularizers.l2(l2_value)),
    layers.BatchNormalization(),
    layers.Dropout(dropout_rate + 0.2), 
    
    layers.Dense(num_classes, activation='softmax')
])

optimizer = Adam(learning_rate=learning_rate)
model.compile(optimizer=optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])

early_stopping = EarlyStopping(monitor='val_accuracy', patience=patience, restore_best_weights=True)

history = model.fit(
    train_features, train_labels,
    epochs=500,
    batch_size=batch_size,
    validation_data=(val_features, val_labels),
    verbose=1,
    callbacks=[early_stopping]
)


model.summary()

val_accuracy = max(history.history['val_accuracy'])

train_losses = history.history['loss']
val_losses = history.history['val_loss']
train_accuracies = history.history['accuracy']
val_accuracies = history.history['val_accuracy']

best_accuracy = val_accuracy

Epoch 1/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 15ms/step - accuracy: 0.0158 - loss: 5.0744 - val_accuracy: 0.0400 - val_loss: 4.1111
Epoch 2/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0424 - loss: 4.5917 - val_accuracy: 0.0767 - val_loss: 3.9232
Epoch 3/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0610 - loss: 4.3257 - val_accuracy: 0.1583 - val_loss: 3.6290
Epoch 4/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.0859 - loss: 4.1026 - val_accuracy: 0.2450 - val_loss: 3.3197
Epoch 5/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.1051 - loss: 3.9011 - val_accuracy: 0.3367 - val_loss: 3.0678
Epoch 6/500
[1m94/94[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 11ms/step - accuracy: 0.1364 - loss: 3.6960 - val_accuracy: 0.3933 - val_loss: 2.8976
Epoch 7/500
[1m94/94[0m [

In [None]:
model.save_weights('model_weights.weights.h5')
print("Model weights saved to 'model_weights.weights.h5'")

In [None]:
# Plotting training and validation losses
plt.figure(figsize=(12, 6))
plt.plot(np.arange(1, len(train_losses) + 1), train_losses, label='Train Loss')
plt.plot(np.arange(1, len(val_losses) + 1), val_losses, label='Val Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Train Loss vs Val Loss')
plt.legend()
plt.show()

# Plotting training and validation accuracy
plt.figure(figsize=(12, 6))
plt.plot(np.arange(1, len(train_accuracies) + 1), train_accuracies, label='Train Accuracy')
plt.plot(np.arange(1, len(val_accuracies) + 1), val_accuracies, label='Val Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Train Accuracy vs Val Accuracy')
plt.legend()
plt.show()

# Print the best accuracy achieved with the fine-tuned parameters
print("Fine-tuned Parameters:", fine_tuned_params)
print("Best Validation Accuracy:", best_accuracy)


In [None]:
def load_and_preprocess_single_image(image_path):
    img = Image.open(image_path).convert("RGB")
    inputs = feature_extractor(images=img, return_tensors="pt")
    
    with torch.no_grad():
        outputs = deit_model(**inputs)
        features = outputs.last_hidden_state[:, 0, :].numpy()
    
    return features

In [None]:
test_image_path = r"C:\Users\User\Downloads\Project Task 1\val_data\8_image_03427.jpg"
test_image_features = load_and_preprocess_single_image(test_image_path)

prediction = model.predict(test_image_features)

predicted_class = np.argmax(prediction, axis=1)

# Print predicted class
print(f"Predicted class for the test image: {predicted_class[0]}")
