In [1]:
import os
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

In [2]:
# Define image dimensions and other parameters
image_size = (150, 150)
input_shape = (150, 150, 3)  # 3 channels for RGB images
batch_size = 64
epochs = 15
num_classes = 4 
data_dir = 'D:/Palak/MIT/Semester 6/Mini Project/CNN model/Preprocessed dataset'

In [3]:
# Load dataset and split into training and validation sets
def load_and_split_dataset(data_dir, test_size=0.2):
    # Load dataset using image_dataset_from_directory
    dataset = image_dataset_from_directory(
        data_dir,
        labels='inferred',  # Automatically infer labels from subdirectory names
        label_mode='int',
        image_size=image_size,
        batch_size=batch_size,
        shuffle=True
    )
     # Split the dataset into training and validation sets
    train_dataset, val_dataset = train_test_split(
        dataset, test_size=test_size, 
        random_state=42, 
        shuffle=True
    )
    return train_dataset, val_dataset

In [4]:
# Data augmentation for training data
def augment_data(train_dataset):
    # Create an instance of ImageDataGenerator
    data_augmentor = ImageDataGenerator(
        rotation_range=20,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True,
        fill_mode='nearest'
    )
    
    # Apply augmentation to training data
    augmented_train_dataset = data_augmentor.flow_from_directory(
        data_dir,
        target_size=image_size,
        batch_size=batch_size,
        class_mode='sparse',
        subset='training'
    )
    
    return augmented_train_dataset


In [5]:
# Normalize images in the dataset
def normalize_dataset(dataset):
    # Normalize pixel values to range [0, 1]
    normalized_dataset = dataset.map(lambda x, y: (x / 255.0, y))
    return normalized_dataset

In [6]:
# Main script
def main():
    # Load and split dataset
    train_dataset, val_dataset = load_and_split_dataset(data_dir, image_size, test_size = 0.2)
    
    # Augment data for training
    augmented_train_dataset = augment_data(train_dataset)
    
    # Normalize training and validation datasets
    normalized_train_dataset = normalize_dataset(augmented_train_dataset)
    normalized_val_dataset = normalize_dataset(val_dataset)
    
    # At this point, normalized_train_dataset and normalized_val_dataset are ready to be used for training your CNN model
    print("Data pre-processing completed.")
    
    # Further steps: Build your CNN model and train it using the pre-processed datasets
    # Use normalized_train_dataset for training and normalized_val_dataset for validation

In [7]:
# Load and preprocess data using ImageDataGenerator
train_datagen = ImageDataGenerator(rescale=1.0/255,
                                   shear_range=0.2,
                                   zoom_range=0.2,
                                   horizontal_flip=True)
test_datagen = ImageDataGenerator(rescale=1.0/255)

In [8]:
train_generator = train_datagen.flow_from_directory(
    'blood cancer dataset',
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='categorical')



Found 19369 images belonging to 4 classes.


In [9]:
validation_generator = test_datagen.flow_from_directory(
    'blood cancer dataset',
    target_size=(150, 150),
    batch_size=batch_size,
    class_mode='categorical')

Found 19369 images belonging to 4 classes.


In [10]:
# Build CNN model
model = Sequential([
    Conv2D(32, (3, 3), activation='relu', input_shape=input_shape),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(64, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Conv2D(128, (3, 3), activation='relu'),
    MaxPooling2D(pool_size=(2, 2)),
    Flatten(),
    Dense(128, activation='relu'),
    Dropout(0.5),
    Dense(num_classes, activation='softmax')  # Output layer for classification
])

  super().__init__(


In [11]:
# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])


In [None]:
# Train the model 
history = model.fit( 
    train_generator, 
    steps_per_epoch=train_generator.samples // batch_size, 
    epochs=epochs, 
    validation_data=validation_generator, 
    validation_steps=validation_generator.samples // batch_size 
) 

Epoch 1/15


  self._warn_if_super_not_called()


[1m122/302[0m [32m━━━━━━━━[0m[37m━━━━━━━━━━━━[0m [1m6:33[0m 2s/step - accuracy: 0.7890 - loss: 0.5551

In [14]:
# Evaluate the model
score = model.evaluate(validation_generator)
print("Test Loss:", score[0])
print("Test Accuracy:", score[1])

[1m528/528[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m195s[0m 370ms/step - accuracy: 0.9997 - loss: 0.0040
Test Loss: 0.0023880978114902973
Test Accuracy: 0.9996446967124939
