In [1]:
# Importing libraries for data handling and visualization
import os  # To handle file and directory operations
import numpy as np  # For numerical computations
import pandas as pd  # For handling tabular data (if applicable)

# Importing libraries for deep learning and image preprocessing
import tensorflow as tf  # TensorFlow for building and training deep learning models
from tensorflow.keras.preprocessing.image import ImageDataGenerator  # To preprocess image data

# Importing libraries for visualization
import matplotlib.pyplot as plt  # For plotting and visualizing images


In [3]:
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define the folder where the dataset is extracted
# Ensure `extracted_folder` contains 'train', 'test', and 'validation' directories
extracted_folder = "/Users/shwetkumar/Documents/ML/Deepdive"

# Define paths for train, test, and validation directories
train_dir = os.path.join(extracted_folder, 'train')
test_dir = os.path.join(extracted_folder, 'test')
validation_dir = os.path.join(extracted_folder, 'validation')

# Print the directory paths for verification
print(f"Train Directory: {train_dir}")
print(f"Test Directory: {test_dir}")
print(f"Validation Directory: {validation_dir}")

# Verify the existence of required directories
if not (os.path.exists(train_dir) and os.path.exists(test_dir) and os.path.exists(validation_dir)):
    raise FileNotFoundError("One or more required directories (train, test, validation) are missing!")

# Create ImageDataGenerator instances for training and testing datasets
# Rescaling pixel values to [0, 1] to normalize the data
train_datagen = ImageDataGenerator(rescale=1.0 / 255)
test_datagen = ImageDataGenerator(rescale=1.0 / 255)

# Create data generators for training, validation, and testing datasets
# Train Generator: Loads and preprocesses images from the training directory
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(224, 224),  # Resize images to 224x224
    batch_size=32,  # Number of images in each batch
    class_mode='categorical'  # Assumes multi-class classification problem
)

# Validation Generator: Loads and preprocesses images from the validation directory
validation_generator = test_datagen.flow_from_directory(
    validation_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Test Generator: Loads and preprocesses images from the test directory
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(224, 224),
    batch_size=32,
    class_mode='categorical'
)

# Confirm successful loading
print("Data generators created successfully!")


Train Directory: /Users/shwetkumar/Documents/ML/Deepdive/train
Test Directory: /Users/shwetkumar/Documents/ML/Deepdive/test
Validation Directory: /Users/shwetkumar/Documents/ML/Deepdive/validation
Found 2667 images belonging to 33 classes.
Found 667 images belonging to 33 classes.
Found 660 images belonging to 33 classes.
Data generators created successfully!
