In [None]:
!pip install efficientnet

In [2]:
import os
os.environ['KERAS_BACKEND'] = 'tensorflow'

In [3]:
import keras
print(keras.__version__)

3.8.0


In [4]:
import pandas as pd
import numpy as np

In [None]:
# downloading the dataset
# dataset was originally downloaded from a private source - link removed for privacy/compliance

In [19]:
# unzipping the dataset
!unzip /content/data.zip > /dev/null 2>&1

In [20]:
# loading the training dataset files
from keras.utils import image_dataset_from_directory

train_dataset = image_dataset_from_directory(
    directory = '/content/train',
    labels = 'inferred',
    label_mode = 'categorical',
    color_mode = 'rgb',
    batch_size = 32,
    image_size = (224, 224),
    validation_split = 0.1,
    subset = 'training',
    seed = 123
)

# finding the class names so in prediction time we can map the predictions to the painters properly
class_names = train_dataset.class_names
print('Class names:', class_names)

val_dataset = image_dataset_from_directory(
    directory = '/content/train',
    labels = 'inferred',
    label_mode = 'categorical',
    color_mode = 'rgb',
    batch_size = 32,
    image_size = (224, 224),
    validation_split = 0.1,
    subset = 'validation',
    seed = 123
)

Found 8829 files belonging to 11 classes.
Using 7947 files for training.
Class names: ['0', '1', '10', '2', '3', '4', '5', '6', '7', '8', '9']
Found 8829 files belonging to 11 classes.
Using 882 files for validation.


In [21]:
# loading the test dataset
test_dataset = image_dataset_from_directory(
    directory = '/content/test',
    labels = None,
    color_mode = 'rgb',
    batch_size = 32,
    image_size = (224, 224),
    shuffle = False
)

Found 1201 files.


In [22]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [9]:
y_test = pd.read_csv('/content/drive/My Drive/Colab Notebooks/DL/9.project2/image_categorizer/data/y_test.csv')
y_test

Unnamed: 0,image_name,cat_id
0,-56lhw2AKjYI0Hnt.jpg,0
1,-6OdHXCBItIArPyk.jpg,2
2,-7241lsvPiVpNVFV.jpg,9
3,-8-0wltLEZBDTM5M.jpg,9
4,-GcrzANWUmrjk2tb.jpg,0
...,...,...
1196,zNIgOhLGUPCyp7vl.jpg,2
1197,zhzjJeI8FwQDZQCE.jpg,1
1198,zizDow_ExDVnz9QS.jpg,2
1199,znpJGKcXxvmOU_nK.jpg,0


In [10]:
# applying EfficientNet preprocessing
from tensorflow.keras.applications.efficientnet import preprocess_input

train_dataset = train_dataset.map(lambda x, y: (preprocess_input(x), y))
val_dataset = val_dataset.map(lambda x, y: (preprocess_input(x), y))

In [12]:
test_dataset = test_dataset.map(lambda x: preprocess_input(x))

In [23]:
# building an image classification model using EfficientNetB0 as a frozen feature extractor
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras import layers, models

base_model = EfficientNetB0(
    weights='imagenet',
    input_shape=(224, 224, 3),
    include_top=False,
)

base_model.trainable = False

x = base_model.output
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(512, activation='relu')(x)
x = layers.Dropout(0.5)(x)
outputs = layers.Dense(11, activation='softmax')(x)

model = models.Model(inputs=base_model.input, outputs=outputs)

In [24]:
# compiling the model
from tensorflow.keras import optimizers

model.compile(
    optimizer = optimizers.Adam(learning_rate=0.0001),
    loss = 'categorical_crossentropy',
    metrics = ['accuracy']
)

In [25]:
# training the model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau

checkpoint_cb = keras.callbacks.ModelCheckpoint("model.keras", save_best_only=True, monitor="val_loss")
early_stopping = EarlyStopping(monitor='val_accuracy', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=3, min_lr=0.00001)

callbacks = [checkpoint_cb, early_stopping, reduce_lr]

EPOCHS = 30
history = model.fit(
    train_dataset,
    validation_data=val_dataset,
    epochs=EPOCHS,
    callbacks=callbacks
)

Epoch 1/30
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m59s[0m 154ms/step - accuracy: 0.4672 - loss: 1.6126 - val_accuracy: 0.8265 - val_loss: 0.6164 - learning_rate: 1.0000e-04
Epoch 2/30
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 60ms/step - accuracy: 0.8033 - loss: 0.6521 - val_accuracy: 0.8469 - val_loss: 0.4955 - learning_rate: 1.0000e-04
Epoch 3/30
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.8303 - loss: 0.5221 - val_accuracy: 0.8560 - val_loss: 0.4474 - learning_rate: 1.0000e-04
Epoch 4/30
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m20s[0m 60ms/step - accuracy: 0.8526 - loss: 0.4623 - val_accuracy: 0.8583 - val_loss: 0.4266 - learning_rate: 1.0000e-04
Epoch 5/30
[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 65ms/step - accuracy: 0.8603 - loss: 0.4245 - val_accuracy: 0.8651 - val_loss: 0.4095 - learning_rate: 1.0000e-04
Epoch 6/30
[1m249/249[0m [32m━━━━━━━

In [None]:
# loading the best saved model and evaluating it on training and validation datasets
best_model = keras.models.load_model("best_model.keras")

train_loss, train_accuracy = best_model.evaluate(train_dataset)
print(f"Training dataset - Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}")

val_loss, val_accuracy = best_model.evaluate(val_dataset)
print(f"Validation dataset - Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

In [26]:
# plotting training and validation curves for a given metric using Plotly
import plotly.express as px

# metric: 'accuracy' or 'loss'
def display_curves(history, metric):
  df = pd.DataFrame(history.history[metric], columns=[metric])
  df['val_'+metric] = history.history['val_'+metric]
  fig = px.line(df, x= df.index+1, y= [metric, 'val_'+metric])
  fig.update_layout(xaxis_title='Epochs', yaxis_title=metric)
  fig.show()

In [27]:
display_curves(history, 'loss')

In [28]:
display_curves(history, 'accuracy')

In [46]:
# evaluating model performance on the training dataset and printing loss and accuracy

train_loss, train_accuracy = model.evaluate(train_dataset)
print(f"Training dataset - Loss: {train_loss:.4f}, Accuracy: {train_accuracy:.4f}")

[1m249/249[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 52ms/step - accuracy: 0.9673 - loss: 0.1286
Training dataset - Loss: 0.1258, Accuracy: 0.9689


In [47]:
# evaluating model performance on the validation dataset and printing loss and accuracy

val_loss, val_accuracy = model.evaluate(val_dataset)
print(f"Validation dataset - Loss: {val_loss:.4f}, Accuracy: {val_accuracy:.4f}")

[1m28/28[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step - accuracy: 0.8748 - loss: 0.3444
Validation dataset - Loss: 0.3574, Accuracy: 0.8787


In [48]:
# generating predictions from the test dataset and mapping them to class labels
predictions = model.predict(test_dataset)

predictions = np.argmax(predictions, axis=1)
predictions = [int(class_names[pred]) for pred in predictions]

[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 102ms/step


In [49]:
# extracting image file names from test dataset file paths
import os

image_names = [os.path.basename(file_path) for file_path in test_dataset.file_paths]

In [50]:
# creating a dataframe for test predictions with image names and predicted category IDs
y_test_pred = pd.DataFrame(columns=['image_name', 'cat_id'])
y_test_pred['image_name'] = image_names
y_test_pred['cat_id'] = predictions
y_test_pred

Unnamed: 0,image_name,cat_id
0,-56lhw2AKjYI0Hnt.jpg,0
1,-6OdHXCBItIArPyk.jpg,2
2,-7241lsvPiVpNVFV.jpg,9
3,-8-0wltLEZBDTM5M.jpg,9
4,-GcrzANWUmrjk2tb.jpg,8
...,...,...
1196,zNIgOhLGUPCyp7vl.jpg,4
1197,zhzjJeI8FwQDZQCE.jpg,1
1198,zizDow_ExDVnz9QS.jpg,2
1199,znpJGKcXxvmOU_nK.jpg,0


In [52]:
# calculating accuracy score between true and predicted category IDs on the test set
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(y_test['cat_id'], y_test_pred['cat_id'])
print(f"Accuracy on Test Set: {accuracy * 100:.2f}%")

Accuracy on Test Set: 86.84%
