In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as img
import numpy as np
import os
import pandas as pd
import pickle
import tensorflow as tf
import tensorflow.keras.backend as K
from tensorflow.keras import regularizers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten,Conv2D
from tensorflow.keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, GlobalAveragePooling2D, AveragePooling2D, BatchNormalization
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, CSVLogger
from tensorflow.keras.optimizers import SGD,Adam
from tensorflow.keras.regularizers import l2
from tensorflow import keras
import numpy as np
from sklearn.metrics import classification_report,confusion_matrix
from tensorflow.keras.applications import MobileNetV2
from sklearn.metrics import precision_score, recall_score, f1_score
from tensorflow.keras import metrics
import seaborn as sns
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau
from keras.callbacks import Callback
from datetime import datetime

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Data Processing

In [None]:
!unzip /content/drive/MyDrive/ifood-2019-fgvc6.zip

In [None]:
num_classes = 251
width, height = 224, 224
train_dir = '/content/ifood-2019-fgvc6/organized_train_set'
val_dir = '/content/ifood-2019-fgvc6/organized_val_set'
test_dir = '/content/ifood-2019-fgvc6/test_set'
batch_size = 32

In [None]:
csv_path = '/content/drive/MyDrive/train_incorrect.csv'

incorrect_images_df = pd.read_csv(csv_path, header=None)
incorrect_images = incorrect_images_df[0].tolist()  # Access the first (and only) column

for root, dirs, files in os.walk(train_dir):  # Traverse through all class subdirectories
    for img_file in incorrect_images:
        img_path = os.path.join(root, img_file)
        if os.path.exists(img_path):
            os.remove(img_path)
            print(f"Removed {img_file} from {root}")
        #else:
           # print(f"{img_file} not found in {root}")

In [None]:
train_datagen = ImageDataGenerator(
    rescale=1. / 255,
    shear_range=0.2,
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    validation_split=0.3,
    horizontal_flip=True,
)

validation_datagen = ImageDataGenerator(rescale=1. / 255)

In [None]:
train_set = train_datagen.flow_from_directory(
    train_dir,
    target_size=(width, height),
    batch_size=batch_size,
    class_mode='categorical',
)

val_set = validation_datagen.flow_from_directory(
    val_dir,
    target_size=(width, height),
    batch_size=batch_size,
    shuffle=False,
    class_mode='categorical',
)

In [None]:
# Get a list of all class directories
class_dirs = [d for d in os.listdir(train_dir) if os.path.isdir(os.path.join(train_dir, d))]

# Count the number of images in each class directory
class_counts = [len(os.listdir(os.path.join(train_dir, cls))) for cls in class_dirs]

# Plot the frequency of image counts using a histogram
plt.figure(figsize=(10, 6))
plt.hist(class_counts, bins=20, color='skyblue', edgecolor='black')

plt.title('Frequency Distribution of Image Counts per Class', fontsize=16)
plt.xlabel('Number of Images per Class', fontsize=12)
plt.ylabel('Frequency of Classes', fontsize=12)

plt.tight_layout()

# Display the frequency chart
plt.show()

In [None]:
from sklearn.utils import class_weight

class_labels = train_set.classes  # Labels for each image in the training set

# Compute class weights
class_weights = class_weight.compute_class_weight(class_weight='balanced',
                                                  classes=np.unique(class_labels),
                                                  y=class_labels)

# Keras requires dict format
class_weights_dict = dict(enumerate(class_weights))

print("Class Weights:", class_weights_dict)

# Model Callbacks

In [None]:
checkpoint_dir = '/content/drive/MyDrive/Pre-Trained-Tests'
os.makedirs(checkpoint_dir, exist_ok=True)

# Get the current timestamp
timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")

# Callbacks
Callbacks = [
    ModelCheckpoint(filepath=os.path.join(checkpoint_dir, f'DenseNet201_Best_{timestamp}.keras'),
                    save_best_only=True,
                    monitor='val_loss',
                    mode='min',
                    save_weights_only=False,
                    verbose=1),  # Save the best model based on validation loss
    EarlyStopping(monitor='val_loss',
                  patience=10,  # Stop training after 10 epochs without improvement
                  mode='min',
                  verbose=1,
                  restore_best_weights=True),
    ReduceLROnPlateau(monitor='val_loss',
                      factor=0.2,  # Reduce learning rate by 20%
                      patience=3,  # Reduce if no improvement after 3 epochs
                      mode='min',
                      verbose=1),
]

In [None]:
class SaveHistory(Callback):

    def __init__(self, save_path):

        super(SaveHistory, self).__init__()

        self.save_path = save_path

        # Load history if exists
        if os.path.exists(self.save_path):

            with open(self.save_path, 'rb') as f:
                self.history = pickle.load(f)

        else:
            self.history = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': [], 'precision': [], 'val_precision': [], 'recall': [], 'val_recall': []}

    def on_epoch_end(self, epoch, logs=None):
        # Append new logs to the existing history
        self.history['accuracy'].append(logs.get('accuracy'))
        self.history['val_accuracy'].append(logs.get('val_accuracy'))
        self.history['loss'].append(logs.get('loss'))
        self.history['val_loss'].append(logs.get('val_loss'))
        self.history['precision'].append(logs.get('precision'))
        self.history['val_precision'].append(logs.get('val_precision'))
        self.history['recall'].append(logs.get('recall'))
        self.history['val_recall'].append(logs.get('val_recall'))

        # Create directory if it doesn't exist
        os.makedirs(os.path.dirname(self.save_path), exist_ok=True)

        # Save history to a file
        with open(self.save_path, 'wb') as f:
            pickle.dump(self.history, f)

        print(f"Epoch {epoch+1} history saved to {self.save_path}")

# Create a unique filename with a timestamp
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
history_save_path = f'/content/drive/MyDrive/Pre-Trained_Tests/DenseNet201_Training_{timestamp}.pkl'

# Create the callback
history_callback = SaveHistory(save_path=history_save_path)


# DenseNet201 Model Testing

In [None]:
resnet = tf.keras.applications.DenseNet201(weights='imagenet', include_top=False,input_shape=(width,width,3))
for layer in resnet.layers[:150]:
    layer.trainable=False

In [None]:
x = resnet.output
x = GlobalAveragePooling2D()(x)
x = Dense(300,activation='relu')(x)
x = Dropout(0.5)(x)
output = Dense(num_classes, activation='softmax')(x)
model = Model(inputs=resnet.input, outputs=output)

In [None]:
model.summary()

In [None]:
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy', metrics.Precision(name='precision'), metrics.Recall(name='recall')])

history = model.fit(train_set,
                      validation_data=val_set,
                      epochs=20,
                      verbose=1,
                      class_weight=class_weights_dict,
                      callbacks=[Callbacks, history_callback])

In [None]:
'''
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/')

history = loaded_model.fit(train_set,
                      validation_data=val_set,
                      epochs=30,
                      verbose=1,
                      class_weight=class_weights_dict,
                      callbacks=[Callbacks, history_callback])
'''

# Model Results Evalutation

In [None]:
loaded_model = tf.keras.models.load_model('/content/drive/MyDrive/Pre-Trained_Tests/DenseNet201_Best_{}.keras')
y_true = val_set.classes
prediction = loaded_model.predict(val_set)
y_pred = np.argmax(prediction, axis=1)

label_map = {}
with open('/content/ifood-2019-fgvc6/class_list.txt', 'r') as f:
    for line in f:
        numerical_label, actual_label = line.strip().split()
        label_map[int(numerical_label)] = actual_label

y_true_mapped = [label_map[label] for label in y_true]
y_pred_mapped = [label_map[label] for label in y_pred]


print("Sample of y_true:", y_true_mapped[:10])
print("Sample of y_pred:", y_pred_mapped[:10])

# Generate the classification report
report = classification_report(y_true_mapped, y_pred_mapped, zero_division=1)
print(report)


In [None]:
result = loaded_model.evaluate(val_set)
print("\nTest Loss:", result[0])
print("Test Accuracy:", result[1])
print("Test Precision:", result[2])
print("Test Recall:", result[3])

In [None]:
class_labels = list(val_set.class_indices.keys())

test_images, test_labels = next(val_set)
predictions = loaded_model.predict(test_images)

for i in range(20):
    predicted_label_index = np.argmax(predictions[i])
    predicted_label = class_labels[predicted_label_index]
    true_label_index = np.argmax(test_labels[i])
    true_label = class_labels[true_label_index]

    plt.figure(figsize=(2,2))
    plt.imshow(test_images[i])
    plt.title(f"Predicted Label: {predicted_label}\nTrue Label: {true_label}")
    plt.axis('off')
    plt.show()