In [1]:
import os
import cv2
import numpy as np

def load_images_from_directories(path_fire, path_smoke, path_no_fire_smoke, IMG_SIZE=224):
    images = []
    labels = []
    image_extensions = ['jpg', 'jpeg', 'png', 'tif', 'tiff'] 

    for path, label in [(path_fire, 1), (path_smoke, 2), (path_no_fire_smoke, 0)]:
        print(f"Processing directory: {path}")
        for file in os.listdir(path):
            if file.split('.')[-1].lower() in image_extensions:
                img_path = os.path.join(path, file)
                img = cv2.imread(img_path, cv2.IMREAD_UNCHANGED)  # Use IMREAD_UNCHANGED for .tif images
                if img is not None:
                    # Check if the image is grayscale (2D) and convert to 3 channels if needed
                    if len(img.shape) == 2:
                        img = cv2.cvtColor(img, cv2.COLOR_GRAY2RGB)
                    elif img.shape[2] == 4:  # For images with 4 channels (e.g., RGBA)
                        img = cv2.cvtColor(img, cv2.COLOR_BGRA2BGR)

                    img = cv2.resize(img, (IMG_SIZE, IMG_SIZE))
                    images.append(img)
                    labels.append(label)
                    # print(f"Loaded image: {img_path}")
                else:
                    print(f"Failed to read image: {img_path}")
            else:
                print(f"Skipped file (not an image): {file}")

    return images, labels


In [2]:
# Set your directory paths here
path_fire = 'datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/fire/'  # Replace with the actual path
path_smoke = 'datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/Smoke/'  # Replace with the actual path
path_no_fire_smoke = 'datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/non fire/'  # Replace with the actual path


In [3]:
IMG_SIZE = 224  # Desired image size (224x224)
images, labels = load_images_from_directories(path_fire, path_smoke, path_no_fire_smoke, IMG_SIZE)

Processing directory: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/fire/
Skipped file (not an image): .DS_Store
Skipped file (not an image): Fire (3).gif


Invalid SOS parameters for sequential JPEG


Failed to read image: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/fire/Fire (4156).jpg




Skipped file (not an image): Fire (1).gif
Processing directory: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/Smoke/
Skipped file (not an image): .DS_Store
Processing directory: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/non fire/
Skipped file (not an image): .DS_Store
Failed to read image: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/non fire/Non_Fire (307).png
Failed to read image: datasets/FOREST_FIRE_SMOKE_AND_NON_FIRE_DATASET/train/non fire/Non_Fire (6758).jpg


In [4]:
# Convert lists to NumPy arrays
images = np.array(images)
labels = np.array(labels)

# Normalize pixel values to be between 0 and 1
images = images.astype('float32') / 255.0

# Print shape to verify
print(f"Images shape: {images.shape}")
print(f"Labels shape: {labels.shape}")

Images shape: (32395, 224, 224, 3)
Labels shape: (32395,)


In [5]:

from sklearn.model_selection import train_test_split

# Split data into training and validation sets (e.g., 80% training, 20% validation)
X_train, X_val, y_train, y_val = train_test_split(images, labels, test_size=0.2, random_state=42)

# Print shapes of the resulting sets
print(f"Training data shape: {X_train.shape}")
print(f"Validation data shape: {X_val.shape}")
print(f"Training labels shape: {y_train.shape}")
print(f"Validation labels shape: {y_val.shape}")


Training data shape: (25916, 224, 224, 3)
Validation data shape: (6479, 224, 224, 3)
Training labels shape: (25916,)
Validation labels shape: (6479,)


In [8]:
X_train = X_train.astype(np.uint8)  
y_train = y_train.astype(np.uint8) 
X_val = X_val.astype(np.uint8) 
y_val = y_val.astype(np.uint8) 

In [None]:
np.savez_compressed('train_data.npz', images=X_train, labels=y_train)
np.savez_compressed('val_data.npz', images=X_val, labels=y_val)