## Image Loading and Preprocessing:

1. Load images from dataset.
2. Preprocess images by resizing, normalizing, and handling imbalanced data to improve model performance and generalization.


In [14]:
# !pip install scipy
# !pip list

Package                      Version
---------------------------- -----------
absl-py                      1.4.0
anyio                        3.7.1
argon2-cffi                  21.3.0
argon2-cffi-bindings         21.2.0
arrow                        1.2.3
asttokens                    2.2.1
astunparse                   1.6.3
async-lru                    2.0.4
attrs                        23.1.0
Babel                        2.12.1
backcall                     0.2.0
beautifulsoup4               4.12.2
bleach                       6.0.0
cachetools                   5.3.1
certifi                      2023.7.22
cffi                         1.15.1
charset-normalizer           3.2.0
colorama                     0.4.6
comm                         0.1.4
contourpy                    1.1.0
cycler                       0.11.0
debugpy                      1.6.7.post1
decorator                    5.1.1
defusedxml                   0.7.1
executing                    1.2.0
fastjsonschema               2

In [1]:
import os
from PIL import Image
import numpy as np
import tensorflow as tf
import keras
%matplotlib inline
import matplotlib.pyplot as plt
import sys
sys.executable
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import scipy


In [3]:
# data\test
sys.executable

'C:\\Users\\ASUS\\OneDrive - Singapore Management University\\Documents\\GitHub\\Fresh-to-Death-Data-Science-Project\\venv\\Scripts\\python.exe'

In [4]:
# Load data
val_path="data/validation"
train_path="data/train"

train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    train_path,
    seed=2509,
    image_size=(224, 224), # Resize
    batch_size=32,
)

val_dataset = tf.keras.preprocessing.image_dataset_from_directory(
    val_path,
    seed=2509,
    image_size=(224, 224), # Resize
    shuffle=False,
    batch_size=32,
)

Found 3115 files belonging to 36 classes.
Found 351 files belonging to 36 classes.


In [5]:
# Define the class names based on the subdirectories in the training directory
class_names = sorted(os.listdir(train_path))

# Initialize label counts
label_counts = {class_name: 0 for class_name in class_names}

# Count the images per class
for images, labels in train_dataset:
    for label in labels.numpy():
        class_name = class_names[label]
        label_counts[class_name] += 1

# Print the class names and label counts
for class_name, count in label_counts.items():
    print("Class {}: {} images".format(class_name, count))


Class apple: 68 images
Class banana: 75 images
Class beetroot: 88 images
Class bell pepper: 90 images
Class cabbage: 92 images
Class capsicum: 89 images
Class carrot: 82 images
Class cauliflower: 79 images
Class chilli pepper: 87 images
Class corn: 87 images
Class cucumber: 94 images
Class eggplant: 84 images
Class garlic: 92 images
Class ginger: 68 images
Class grapes: 100 images
Class jalepeno: 88 images
Class kiwi: 88 images
Class lemon: 82 images
Class lettuce: 97 images
Class mango: 86 images
Class onion: 94 images
Class orange: 69 images
Class paprika: 83 images
Class pear: 89 images
Class peas: 100 images
Class pineapple: 99 images
Class pomegranate: 79 images
Class potato: 77 images
Class raddish: 81 images
Class soy beans: 97 images
Class spinach: 97 images
Class sweetcorn: 91 images
Class sweetpotato: 69 images
Class tomato: 92 images
Class turnip: 98 images
Class watermelon: 84 images


In [None]:
augmentation_generator = ImageDataGenerator(
    rotation_range=20,
    width_shift_range=0.1,
    height_shift_range=0.1,
    horizontal_flip=True,
    zoom_range=0.1,
    fill_mode='nearest'
)

# Apply augmentation to the normalized dataset
augmented_data = []

for images, labels in train_dataset:
    augmented_images = augmentation_generator.flow(images, batch_size=images.shape[0])
    for augmented_image_batch in augmented_images:
        augmented_data.append((augmented_image_batch, labels))

# Combine original and augmented data
combined_data = train_dataset.concatenate(tf.data.Dataset.from_generator(
    lambda: ((x, y) for x, y in augmented_data), 
    output_signature=(
        tf.TensorSpec(shape=(None, 224, 224, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None,), dtype=tf.int32)
    )
))

# Shuffle the combined data
combined_data = combined_data.shuffle(buffer_size=len(combined_data))


In [None]:
# # Normalize pixel values using the map() function
train_Normalize = combined_data.map(lambda x, y: (x / 255, y))
val_dataset = val_dataset.map(lambda x, y: (x / 255, y))


## Apply the YOLO (You Only Look Once) model to your normalized and balanced data

1. Load images from dataset.
2. Preprocess images by resizing, normalizing, and handling imbalanced data to improve model performance and generalization.
