In [2]:
#system libraries
import os
import sys
import platform
from glob import glob
from pathlib import Path
import gc

#common libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

#image preprocessing
import cv2

#tensorflow
import tensorflow as tf
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.layers import (Conv2D, MaxPooling2D, Dense, Input, Flatten,
                                    Dropout, UpSampling2D, GlobalAveragePooling2D)
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam, SGD

#sklearn
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.datasets import load_files

#balancing dataset
from imblearn.over_sampling import SMOTE


2023-08-31 12:24:59.486385: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [None]:
print(f'Python platform: {platform.platform()}')
print(f'Tensorflow version: {tf.__version__}')
#print(f"Keras version {keras.__version__}")
print()
print(f'System version: {sys.version}')
# print(f'Panda version: {pd.__version__}')
# print(f'Scikit-learn: {sk.__version__}')
gpu = len(tf.config.list_physical_devices("gpu")) > 0
print("GPU is ", "available" if gpu else "NOT AVAILABLE")

In [3]:
train_dir = os.path.join('/OriginalDataset', 'training_set')
test_dir = os.path.join('/OriginalDataset', 'testing_set')

In [4]:
def load_as_dataframe(path, target_shape=(224, 224)):
    data = load_files(path)
    face_files = np.array(data['filenames'])
    face_targets = np.array(data['target'])

    images, labels = [], []
    for file, target in zip(face_files, face_targets):
        img = cv2.imread(file)
        if img is not None:
            img_resized = cv2.resize(img, target_shape)
            images.append(img_resized)
            labels.append(target)
        else:
            print(f"Failed to load image at path: {file}")

    df = pd.DataFrame({
        'images': images,
        'faceshape': labels
    })

    return df

In [5]:
# Load datasets
df_train = load_as_dataframe(train_dir)
df_test = load_as_dataframe(test_dir)

Failed to load image at path: /content/drive/MyDrive/OriginalDataset/training_set/Oblong/oblong (724).jpg
Failed to load image at path: /content/drive/MyDrive/OriginalDataset/testing_set/Round/desktop.ini


In [6]:
# Prepare data for SMOTE
X_images = np.array(df_train['images'].tolist()).reshape(len(df_train), -1)
y_matched = df_train['faceshape'].values

In [8]:
del df_train
gc.collect()

In [9]:
X_images_test = np.array(df_test['images'].tolist()).reshape(len(df_test), -1)
y_matched_test = df_test['faceshape'].values

In [10]:
del df_test
import gc
gc.collect()

0

In [11]:
# SMOTE
sm = SMOTE(random_state=42)
X_smote_train, y_smote_train = sm.fit_resample(X_images, y_matched)
X_smote_test, y_smote_test = sm.fit_resample(X_images_test, y_matched_test)

In [None]:
print(X_images.shape)
print(y_matched.shape)

In [None]:
print(df_train['faceshape'].unique())

In [None]:
print(df_train['faceshape'].isna().sum())

In [None]:
del X_images, y_matched
gc.collect()

In [None]:
image_to_display = df_train["images"].iloc[0]

# Display the image
plt.imshow(image_to_display[..., ::-1])  # Convert BGR to RGB format for displaying
plt.axis('off')  # To hide axis values
plt.show()

In [None]:
face_type = [item[52:-1] for item in sorted(glob('/content/drive/MyDrive/OriginalDataset/training_set/*/'))]
face_type

In [None]:
df_train.head()

In [None]:
df_train.groupby('faceshape').count()

In [None]:
df_test.groupby('faceshape').count()

In [None]:
def check_image_dimensions(df):
    unique_shapes = set()
    for idx, row in df.iterrows():
        img = cv2.imread(row['files'])
        if img is not None:
            unique_shapes.add(img.shape)
        else:
            print(f"Failed to load image at path: {row['files']}")
    return unique_shapes

print(check_image_dimensions(df_train))

In [None]:
y_smotedf = pd.DataFrame(data=y_smote, columns=['faceshape'])
faceshapes_df = y_smotedf.groupby('faceshape').size().reset_index(name='count')
print(faceshapes_df)

In [None]:
fig, ax = plt.subplots()

bar_labels = ['red', 'blue', 'green', 'orange','pink' ]
bar_colors = ['tab:red', 'tab:blue', 'tab:green', 'tab:orange', 'tab:pink']

ax.bar(faceshapes_df['faceshape'], faceshapes_df['count'], label=bar_labels, color=bar_colors)

ax.set_ylabel('fruit supply')
ax.set_title('Fruit supply by kind and color')
ax.legend(title='Fruit color')

## EDA

In [13]:
X_smote_i = X_smote.reshape((-1, *(224,224), 3))
X_smote_images = X_smote_i / 255.0
y_smote_categorical = tf.keras.utils.to_categorical(y_smote, num_classes=5)

In [15]:
del X_smote, y_smote, X_smote_i
import gc
gc.collect()

NameError: ignored

In [13]:
BATCH_SIZE = 32
IMG_SIZE = (224, 224)

In [14]:
train_dataset = tf.data.Dataset.from_tensor_slices((X_smote_images, y_smote_categorical))
train_dataset = train_dataset.cache()
train_dataset = train_dataset.shuffle(1024).batch(BATCH_SIZE).prefetch(tf.data.experimental.AUTOTUNE)


In [24]:
train_dataset = tf.keras.utils.image_dataset_from_directory(train_dir,
                                                            shuffle=True,
                                                            label_mode= 'categorical',
                                                            batch_size=BATCH_SIZE,
                                                            image_size=IMG_SIZE)

Found 1411 files belonging to 5 classes.


In [25]:
test_dataset = tf.keras.utils.image_dataset_from_directory(test_dir,
                                                           shuffle=True,
                                                           label_mode= 'categorical',
                                                           validation_split = 0.33,
                                                           subset = "training",
                                                           batch_size=BATCH_SIZE,
                                                           image_size=IMG_SIZE,
                                                           seed = 1)

Found 1000 files belonging to 5 classes.
Using 670 files for training.


In [51]:
val_dataset = tf.keras.utils.image_dataset_from_directory(test_dir,
                                                          shuffle=True,
                                                          label_mode= 'categorical',
                                                          validation_split = 0.33,
                                                          subset = "validation",
                                                          batch_size=BATCH_SIZE,
                                                          image_size=IMG_SIZE,
                                                          seed = 1)

Found 1000 files belonging to 5 classes.
Using 330 files for validation.


In [52]:
AUTOTUNE = tf.data.AUTOTUNE

train_dataset = train_dataset.prefetch(buffer_size=AUTOTUNE)
val_dataset = val_dataset.prefetch(buffer_size=AUTOTUNE)
test_dataset = test_dataset.prefetch(buffer_size=AUTOTUNE)

In [19]:
def recall_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
    recall = true_positives / (possible_positives + K.epsilon())
    return recall

def precision_m(y_true, y_pred):
    true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
    predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
    precision = true_positives / (predicted_positives + K.epsilon())
    return precision

def f1_m(y_true, y_pred):
    precision = precision_m(y_true, y_pred)
    recall = recall_m(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [20]:
def create_model():
  inputs = Input(shape=(224, 224, 3))
  x1 = data_augmentation(inputs)
  x2 = preprocess_input(x1)
  base_model_layer = base_model(x2, training=False)
  pooling_layer = GlobalAveragePooling2D()(base_model_layer)
  dropout_layer = Dropout(0.1)(pooling_layer)
  Layer_1 = Dense(512, activation='relu')(dropout_layer)
  outputs = Dense(5, activation='softmax')(Layer_1)
  model = Model(inputs, outputs)
  return model

In [21]:
data_augmentation = tf.keras.Sequential([
  tf.keras.layers.RandomFlip('horizontal'),
  tf.keras.layers.RandomRotation(0.2),
  tf.keras.layers.RandomZoom(0.2),
])

In [22]:
preprocess_input = tf.keras.applications.mobilenet_v2.preprocess_input

In [23]:
preprocess_input = tf.keras.layers.Rescaling(1./127.5, offset=-1)

## MOBILENETV2 model feature extraction

In [18]:
IMG_SHAPE = IMG_SIZE + (3,)
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_224_no_top.h5


In [39]:
image_batch, label_batch = next(iter(train_dataset))
feature_batch = base_model(image_batch)
print(feature_batch.shape)

(32, 7, 7, 1280)


In [None]:
label_batch

In [None]:
image_batch

In [24]:
base_model.trainable = False

In [43]:
base_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 224, 224, 3  0           []                               
                                )]                                                                
                                                                                                  
 Conv1 (Conv2D)                 (None, 112, 112, 32  864         ['input_1[0][0]']                
                                )                                                                 
                                                                                                  
 bn_Conv1 (BatchNormalization)  (None, 112, 112, 32  128         ['Conv1[0][0]']                  
                                )                                              

In [44]:
inputs = Input(shape=(224, 224, 3))
x1 = data_augmentation(inputs)
x2 = preprocess_input(x1)
base_model_layer = base_model(x2, training=False)
pooling_layer = GlobalAveragePooling2D()(base_model_layer)
dropout_layer = Dropout(0.1)(pooling_layer)
Layer_1 = Dense(512, activation='relu')(dropout_layer)
outputs = Dense(5, activation='softmax')(Layer_1)
model = Model(inputs, outputs)

In [60]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss=tf.keras.losses.CategoricalCrossentropy(),
              metrics=['accuracy',f1_m])

In [61]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_2 (InputLayer)        [(None, 224, 224, 3)]     0         
                                                                 
 sequential (Sequential)     (None, 224, 224, 3)       0         
                                                                 
 rescaling (Rescaling)       (None, 224, 224, 3)       0         
                                                                 
 mobilenetv2_1.00_224 (Funct  (None, 7, 7, 1280)       2257984   
 ional)                                                          
                                                                 
 global_average_pooling2d (G  (None, 1280)             0         
 lobalAveragePooling2D)                                          
                                                                 
 dropout (Dropout)           (None, 1280)              0     

In [47]:
len(model.trainable_variables)

4

In [58]:
type(val_dataset)

tensorflow.python.data.ops.prefetch_op._PrefetchDataset

In [57]:
type(model)

keras.engine.functional.Functional

In [63]:
initial_epochs = 10

loss, accuracy, f1_m0 = model.evaluate(val_dataset)



In [65]:
print("initial loss: {:.2f}".format(loss))
print("initial accuracy: {:.2f}".format(accuracy))
print("initial f1 score: {:.2f}".format(f1_m0))

initial loss: 1.87
initial accuracy: 0.21
initial f1 score: 0.04


In [66]:
checkpointer_init = ModelCheckpoint(filepath='/content/drive/MyDrive/Colab Notebooks/CNN_mobileNetV2_model1_trial2.hdf5', verbose=1, save_best_only=True)
history_init = model.fit(train_dataset,
                    epochs=initial_epochs,
                    validation_data=val_dataset,
                    callbacks = [checkpointer_init])

Epoch 1/10

InvalidArgumentError: ignored