In [2]:
import os
import shutil
import random
from tqdm import tqdm

def prepare_food101_dataset(source_dir, target_dir, split_ratio=0.5):
    """
    Prepare Food-101 dataset by selecting random subset of images.
    
    Parameters:
    source_dir: Original Food-101 dataset directory path
    target_dir: Target directory where the subset will be created
    split_ratio: Ratio of images to select (default: 0.5 for 50%)
    """
    
    # Create main directories
    train_dir = os.path.join(target_dir, 'train')
    test_dir = os.path.join(target_dir, 'test')
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(test_dir, exist_ok=True)
    
    # Get list of all food classes, filtering out hidden files
    images_dir = os.path.join(source_dir, 'images')
    food_classes = [f for f in os.listdir(images_dir) 
                   if not f.startswith('.') and os.path.isdir(os.path.join(images_dir, f))]
    
    print(f"Processing {len(food_classes)} food classes...")
    
    # Process each food class
    for food_class in tqdm(food_classes):
        # Create directories for this class
        os.makedirs(os.path.join(train_dir, food_class), exist_ok=True)
        os.makedirs(os.path.join(test_dir, food_class), exist_ok=True)
        
        # Get all images for this class, filtering out hidden files
        source_class_dir = os.path.join(source_dir, 'images', food_class)
        all_images = [f for f in os.listdir(source_class_dir) 
                     if not f.startswith('.') and f.endswith('.jpg')]
        
        # Calculate number of images for training
        num_train = int(len(all_images) * split_ratio)
        
        # Randomly select images for training
        train_images = random.sample(all_images, num_train)
        # Remaining images will be for testing
        test_images = list(set(all_images) - set(train_images))
        
        # Copy training images
        for img in train_images:
            src = os.path.join(source_class_dir, img)
            dst = os.path.join(train_dir, food_class, img)
            shutil.copy2(src, dst)
            
        # Copy testing images
        for img in test_images:
            src = os.path.join(source_class_dir, img)
            dst = os.path.join(test_dir, food_class, img)
            shutil.copy2(src, dst)
    
    print("\nDataset preparation completed!")
    print(f"Created dataset with {len(food_classes)} classes")
    print(f"Target directory: {target_dir}")

# Example usage:
if __name__ == "__main__":
    # Adjust these paths according to your setup
    SOURCE_DIR = "/kaggle/input/food-101/food-101/food-101"  # Original Food-101 dataset directory
    TARGET_DIR = "/kaggle/working"  # Where to create the new dataset
    
    prepare_food101_dataset(SOURCE_DIR, TARGET_DIR, split_ratio=0.5)

Processing 101 food classes...


100%|██████████| 101/101 [08:26<00:00,  5.01s/it]


Dataset preparation completed!
Created dataset with 101 classes
Target directory: /kaggle/working





In [1]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
import gc

# Memory optimization: Clear any existing models/memory
tf.keras.backend.clear_session()
gc.collect()

# Constants - Reduced batch size and image size for memory optimization
IMG_SIZE = (160, 160)  # Reduced from 224x224
BATCH_SIZE = 16       # Reduced from 32
EPOCHS = 15
FINE_TUNE_EPOCHS = 10

# Dataset paths
train_dir = "/kaggle/working/train"
test_dir = "/kaggle/working/test"

# Get number of classes
num_classes = len(os.listdir(train_dir))
print(f"Number of classes detected: {num_classes}")

def prepare_dataset():
    print("Loading training dataset...")
    train_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical'
    )

    print("Loading validation dataset...")
    val_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical'
    )

    print("Loading test dataset...")
    test_ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical',
        shuffle=False
    )

    # Memory optimization: Use dataset.map to resize images on the fly
    resize_and_rescale = tf.keras.Sequential([
        layers.Rescaling(1./255)
    ])

    train_ds = train_ds.map(lambda x, y: (resize_and_rescale(x), y),
                           num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.map(lambda x, y: (resize_and_rescale(x), y),
                        num_parallel_calls=tf.data.AUTOTUNE)
    test_ds = test_ds.map(lambda x, y: (resize_and_rescale(x), y),
                         num_parallel_calls=tf.data.AUTOTUNE)

    # Memory optimization: Configure prefetch
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, test_ds

def create_model(num_classes):
    print("Creating MobileNetV2 model...") # Changed from EfficientNetB0 to MobileNetV2
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=IMG_SIZE + (3,),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    inputs = layers.Input(shape=IMG_SIZE + (3,))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    
    return tf.keras.Model(inputs, outputs)

class TrainingMonitor(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Memory optimization: Clear memory after each epoch
        gc.collect()
        print(f"\nEpoch {epoch+1} completed")
        print(f"Training Accuracy: {logs['accuracy']:.4f}")
        print(f"Validation Accuracy: {logs['val_accuracy']:.4f}")

def main():
    # Memory optimization: Limit GPU memory growth
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)

    print("Starting the training process...")
    train_ds, val_ds, test_ds = prepare_dataset()
    
    print("Creating and compiling model...")
    model = create_model(num_classes)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Show model summary
    model.summary()

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath='/kaggle/working/best_model.keras',
            save_best_only=True,
            monitor='val_accuracy'
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=3,
            restore_best_weights=True
        ),
        TrainingMonitor()
    ]

    print("\nStarting initial training phase...")
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=callbacks
    )

    # Memory optimization: Clear some memory before fine-tuning
    gc.collect()

    print("\nStarting fine-tuning phase...")
    base_model = model.layers[1]
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    history_fine = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS + FINE_TUNE_EPOCHS,
        initial_epoch=EPOCHS,
        callbacks=callbacks
    )

    print("\nEvaluating final model...")
    test_loss, test_accuracy = model.evaluate(test_ds)
    print(f"\nFinal Test Accuracy: {test_accuracy:.4f}")
    
    print("\nSaving final model...")
    model.save('/kaggle/working/food_classification_model_final.keras')
    print("Training completed successfully!")

if __name__ == "__main__":
    main()

Number of classes detected: 101
Starting the training process...
Loading training dataset...
Found 50500 files belonging to 101 classes.
Using 40400 files for training.
Loading validation dataset...
Found 50500 files belonging to 101 classes.
Using 10100 files for validation.
Loading test dataset...
Found 50500 files belonging to 101 classes.
Creating and compiling model...
Creating MobileNetV2 model...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/mobilenet_v2/mobilenet_v2_weights_tf_dim_ordering_tf_kernels_1.0_160_no_top.h5
[1m9406464/9406464[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step



Starting initial training phase...
Epoch 1/15


I0000 00:00:1734279776.448390     427 service.cc:145] XLA service 0x7888d003b620 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
I0000 00:00:1734279776.448447     427 service.cc:153]   StreamExecutor device (0): Tesla T4, Compute Capability 7.5
I0000 00:00:1734279776.448455     427 service.cc:153]   StreamExecutor device (1): Tesla T4, Compute Capability 7.5


[1m  12/2525[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m25s[0m 10ms/step - accuracy: 0.0130 - loss: 5.4066     

I0000 00:00:1734279781.863646     427 device_compiler.h:188] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.


[1m2523/2525[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 17ms/step - accuracy: 0.3120 - loss: 3.0197
Epoch 1 completed
Training Accuracy: 0.3912
Validation Accuracy: 0.4748
[1m2525/2525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m67s[0m 23ms/step - accuracy: 0.3121 - loss: 3.0191 - val_accuracy: 0.4748 - val_loss: 2.1433
Epoch 2/15
[1m2524/2525[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.5123 - loss: 1.9327
Epoch 2 completed
Training Accuracy: 0.5090
Validation Accuracy: 0.4926
[1m2525/2525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 15ms/step - accuracy: 0.5123 - loss: 1.9327 - val_accuracy: 0.4926 - val_loss: 2.1098
Epoch 3/15
[1m2523/2525[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 12ms/step - accuracy: 0.5496 - loss: 1.7694
Epoch 3 completed
Training Accuracy: 0.5467
Validation Accuracy: 0.4892
[1m2525/2525[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 15ms/step - accuracy: 0.5496 - loss: 1.7694 - v

In [3]:
import tensorflow as tf
import numpy as np
from tensorflow.keras import layers
import matplotlib.pyplot as plt
import os
import gc

# Memory optimization: Clear any existing models/memory
tf.keras.backend.clear_session()
gc.collect()

# Constants - Reduced batch size and image size for memory optimization
IMG_SIZE = (160, 160)  # Reduced from 224x224
BATCH_SIZE = 32       # Reduced from 32
EPOCHS = 20
FINE_TUNE_EPOCHS = 20

# Dataset paths
train_dir = "/kaggle/working/train"
test_dir = "/kaggle/working/test"

# Get number of classes
num_classes = len(os.listdir(train_dir))
print(f"Number of classes detected: {num_classes}")

def prepare_dataset():
    print("Loading training dataset...")
    train_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        validation_split=0.2,
        subset="training",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical'
    )

    print("Loading validation dataset...")
    val_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        validation_split=0.2,
        subset="validation",
        seed=123,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical'
    )

    print("Loading test dataset...")
    test_ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        image_size=IMG_SIZE,
        batch_size=BATCH_SIZE,
        label_mode='categorical',
        shuffle=False
    )

    # Memory optimization: Use dataset.map to resize images on the fly
    resize_and_rescale = tf.keras.Sequential([
        layers.Rescaling(1./255)
    ])

    train_ds = train_ds.map(lambda x, y: (resize_and_rescale(x), y),
                           num_parallel_calls=tf.data.AUTOTUNE)
    val_ds = val_ds.map(lambda x, y: (resize_and_rescale(x), y),
                        num_parallel_calls=tf.data.AUTOTUNE)
    test_ds = test_ds.map(lambda x, y: (resize_and_rescale(x), y),
                         num_parallel_calls=tf.data.AUTOTUNE)

    # Memory optimization: Configure prefetch
    AUTOTUNE = tf.data.AUTOTUNE
    train_ds = train_ds.prefetch(buffer_size=AUTOTUNE)
    val_ds = val_ds.prefetch(buffer_size=AUTOTUNE)
    test_ds = test_ds.prefetch(buffer_size=AUTOTUNE)

    return train_ds, val_ds, test_ds

def create_model(num_classes):
    print("Creating MobileNetV2 model...") # Changed from EfficientNetB0 to MobileNetV2
    base_model = tf.keras.applications.MobileNetV2(
        input_shape=IMG_SIZE + (3,),
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    inputs = layers.Input(shape=IMG_SIZE + (3,))
    x = base_model(inputs, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(num_classes, activation="softmax")(x)
    
    return tf.keras.Model(inputs, outputs)

class TrainingMonitor(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        # Memory optimization: Clear memory after each epoch
        gc.collect()
        print(f"\nEpoch {epoch+1} completed")
        print(f"Training Accuracy: {logs['accuracy']:.4f}")
        print(f"Validation Accuracy: {logs['val_accuracy']:.4f}")

def main():
    # Memory optimization: Limit GPU memory growth
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
        except RuntimeError as e:
            print(e)

    print("Starting the training process...")
    train_ds, val_ds, test_ds = prepare_dataset()
    
    print("Creating and compiling model...")
    model = create_model(num_classes)
    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    # Show model summary
    model.summary()

    callbacks = [
        tf.keras.callbacks.ModelCheckpoint(
            filepath='/kaggle/working/best_model.keras',
            save_best_only=True,
            monitor='val_accuracy'
        ),
        tf.keras.callbacks.EarlyStopping(
            monitor='val_accuracy',
            patience=3,
            restore_best_weights=True
        ),
        TrainingMonitor()
    ]

    print("\nStarting initial training phase...")
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=callbacks
    )

    # Memory optimization: Clear some memory before fine-tuning
    gc.collect()

    print("\nStarting fine-tuning phase...")
    base_model = model.layers[1]
    base_model.trainable = True
    for layer in base_model.layers[:-30]:
        layer.trainable = False

    model.compile(
        optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    history_fine = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS + FINE_TUNE_EPOCHS,
        initial_epoch=EPOCHS,
        callbacks=callbacks
    )

    print("\nEvaluating final model...")
    test_loss, test_accuracy = model.evaluate(test_ds)
    print(f"\nFinal Test Accuracy: {test_accuracy:.4f}")
    
    print("\nSaving final model...")
    model.save('/kaggle/working/food_classification_model_final2.keras')
    print("Training completed successfully!")

if __name__ == "__main__":
    main()

Number of classes detected: 101
Starting the training process...
Loading training dataset...
Found 50500 files belonging to 101 classes.
Using 40400 files for training.
Loading validation dataset...
Found 50500 files belonging to 101 classes.
Using 10100 files for validation.
Loading test dataset...
Found 50500 files belonging to 101 classes.
Creating and compiling model...
Creating MobileNetV2 model...



Starting initial training phase...
Epoch 1/20
[1m1263/1263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 24ms/step - accuracy: 0.2932 - loss: 3.0921
Epoch 1 completed
Training Accuracy: 0.3822
Validation Accuracy: 0.4762
[1m1263/1263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m51s[0m 34ms/step - accuracy: 0.2933 - loss: 3.0917 - val_accuracy: 0.4762 - val_loss: 2.0970
Epoch 2/20
[1m1263/1263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 22ms/step - accuracy: 0.5183 - loss: 1.9104
Epoch 2 completed
Training Accuracy: 0.5170
Validation Accuracy: 0.4944
[1m1263/1263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 28ms/step - accuracy: 0.5183 - loss: 1.9104 - val_accuracy: 0.4944 - val_loss: 2.0408
Epoch 3/20
[1m1262/1263[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 22ms/step - accuracy: 0.5566 - loss: 1.7189
Epoch 3 completed
Training Accuracy: 0.5527
Validation Accuracy: 0.4990
[1m1263/1263[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 