In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# Data splitting
70:30



In [None]:
import os
import shutil
import random

# Define the source directory containing the folders
source_dir = 'Images'

# Define the destination directories for train and test sets
train_dir = 'Train'
test_dir = 'Test'

# Ensure the destination folders exist; create them if they don't
os.makedirs(train_dir, exist_ok=True)
os.makedirs(test_dir, exist_ok=True)

# List of folders inside the source directory
folders = ['ALL', 'AML', 'CML', 'CLL', 'Healthy']

# Split each folder into train and test sets
for folder in folders:
    # Get the list of files in the current folder
    files = os.listdir(os.path.join(source_dir, folder))
    # Shuffle the list of files randomly
    random.shuffle(files)
    # Calculate the number of files for the training set (70%) and test set (30%)
    num_train = int(0.7 * len(files))
    num_test = len(files) - num_train
    # Move files to the train set
    for filename in files[:num_train]:
        source_file = os.path.join(source_dir, folder, filename)
        dest_file = os.path.join(train_dir, folder, filename)
        os.makedirs(os.path.dirname(dest_file), exist_ok=True)
        shutil.move(source_file, dest_file)
    # Move files to the test set
    for filename in files[num_train:]:
        source_file = os.path.join(source_dir, folder, filename)
        dest_file = os.path.join(test_dir, folder, filename)
        os.makedirs(os.path.dirname(dest_file), exist_ok=True)
        shutil.move(source_file, dest_file)

print("Files split into train and test sets successfully!")

# Data Preprocessing

(no of image, height, width, 3 = RGB)

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import cv2, os
import numpy as np
from tensorflow.keras.preprocessing.image import img_to_array
from sklearn.preprocessing import LabelEncoder

def preprocess_images(images_dir, image_size=(299, 299), augment=False):
    X = []
    y = []


    # Get the list of class directories
    class_dirs = [d for d in os.listdir(images_dir) if os.path.isdir(os.path.join(images_dir, d))]

    # Loop through each class directory
    for class_dir in class_dirs:
        class_label = class_dir

        # Get the list of image files in the class directory
        image_files = [f for f in os.listdir(os.path.join(images_dir, class_dir)) if f.endswith('.jpg') or f.endswith('.png')]

        # Loop through each image file
        for filename in image_files:
            # Read the image
            img = cv2.imread(os.path.join(images_dir, class_dir, filename))
            # Resize the image
            img = cv2.resize(img, image_size)
            # Convert the image to float32 and normalize pixel values to the range [0, 1]
            img = img.astype(np.float32) / 255.0
            # Convert the image to array
            img_array = img_to_array(img)
            # Append the image array to X
            X.append(img_array)
            # Append the class label to y
            y.append(class_label)

    # Encode the class labels
    label_encoder = LabelEncoder()
    y = label_encoder.fit_transform(y)

    # Convert X and y to numpy arrays
    X = np.array(X)
    y = np.array(y)

    return X, y

# Define directories containing your images
train_dir = '/content/drive/MyDrive/Multi/Train'
test_dir = '/content/drive/MyDrive/Multi/Test'

# Preprocess the resized images in the train directory with data augmentation
X_train, y_train = preprocess_images(train_dir)

# Preprocess the resized images in the test directory without data augmentation
X_test, y_test = preprocess_images(test_dir)

# Print the shapes of the preprocessed data
print("Shape of X_train:", X_train.shape)
print("Shape of y_train:", y_train.shape)
print("Shape of X_test:", X_test.shape)
print("Shape of y_test:", y_test.shape)


Shape of X_train: (2463, 299, 299, 3)
Shape of y_train: (2463,)
Shape of X_test: (1059, 299, 299, 3)
Shape of y_test: (1059,)


# Compile the model

In [None]:
import tensorflow as tf
from tensorflow.keras.applications import InceptionV3
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import regularizers


# Define batch size
batch_size = 32

# Load the InceptionV3 base model
base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(299, 299, 3))


# Add custom classification layers
x = base_model.output
x = GlobalAveragePooling2D()(x)
x = Dropout(0.5)(x)  # Adding dropout for regularization
x = Dense(128, activation='relu', kernel_regularizer=regularizers.l2(0.001))(x)  # Add L2 regularization
predictions = Dense(5, activation='softmax')(x)  # Change activation to 'softmax'

# Create the final model
model = Model(inputs=base_model.input, outputs=predictions)

# Freeze the layers of the base model except the last few
for layer in base_model.layers[:-5]:  # Fine-tuning by unfreezing last few layers
    layer.trainable = False

# Compile the model with a lower learning rate
model.compile(optimizer=Adam(lr=0.0001), loss='categorical_crossentropy', metrics=['accuracy'])

#model.summary()

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Define directories containing preprocessed images
train_dir = '/content/drive/MyDrive/Multi/Train'
test_dir = '/content/drive/MyDrive/Multi/Test'

# Define batch size
batch_size = 32

# Create ImageDataGenerator for training and testing data
train_datagen = ImageDataGenerator(
    rescale=1./255
)

test_datagen = ImageDataGenerator(rescale=1./255)

# Flow training images in batches using ImageDataGenerator
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=(299, 299),
    batch_size=batch_size,
    class_mode='categorical'
)

# Flow test images in batches using ImageDataGenerator
test_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=(299, 299),
    batch_size=batch_size,
    class_mode='categorical'
)


Found 2463 images belonging to 5 classes.
Found 1059 images belonging to 5 classes.


# Model Training

In [None]:
history = model.fit(
    train_generator,
    epochs=5, #number of epochs 
    validation_data=test_generator
)

In [None]:
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Assuming y_pred contains the predicted labels for the test data
# Replace y_pred with your actual predictions
y_pred = model.predict(X_test)
y_pred_classes = np.argmax(y_pred, axis=1)

# Calculate accuracy
accuracy = accuracy_score(y_test, y_pred_classes)

# Calculate precision
precision = precision_score(y_test, y_pred_classes, average='weighted')

# Calculate recall
recall = recall_score(y_test, y_pred_classes, average='weighted')

# Calculate F1 score
f1 = f1_score(y_test, y_pred_classes, average='weighted')

print("Accuracy:", accuracy)
print("Precision:", precision)
print("Recall:", recall)
print("F1 Score:", f1)


In [None]:
# Assuming your trained model object is named 'model' and you want to save it to a file named 'my_model.h5'
model.save('my_model.h5')


In [None]:
# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator)
print('Test accuracy:', test_acc)


In [None]:
import matplotlib.pyplot as plt

# Extract loss and accuracy values from history
loss = history.history['loss']
val_loss = history.history['val_loss']
accuracy = history.history['accuracy']
val_accuracy = history.history['val_accuracy']
epochs = range(1, len(loss) + 1)

# Plot loss
plt.figure(figsize=(10, 5))
plt.plot(epochs, loss, 'b', label='Training loss')
plt.plot(epochs, val_loss, 'r', label='Testing loss')
plt.title('Training and Testing Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

# Plot accuracy
plt.figure(figsize=(10, 5))
plt.plot(epochs, accuracy, 'b', label='Training accuracy')
plt.plot(epochs, val_accuracy, 'r', label='Testing accuracy')
plt.title('Training and Testing Accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)
plt.show()
