<a href="https://colab.research.google.com/github/specM7/DSGP_Group_33_Brain_Tumor_Predictor/blob/Pituitary-Adenomas-Malindu-2425440/DSGP_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import os
import numpy as np
import random
from PIL import Image, ImageEnhance
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Flatten, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.applications import VGG16
from sklearn.utils import shuffle
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
import matplotlib.pyplot as plt

In [None]:
from google.colab import drive
drive.mount('/content/drive')

MessageError: Error: credential propagation was unsuccessful

In [None]:
# Directories for training and testing data
train_dir = '/content/drive/MyDrive/Training'
# Define image size for VGG16
IMAGE_SIZE = 224


# Load and shuffle the train data
train_paths = []
train_labels = []
for label in os.listdir(train_dir):
    for image in os.listdir(os.path.join(train_dir, label)):
        train_paths.append(os.path.join(train_dir, label, image))
        train_labels.append(label)

train_paths, train_labels = shuffle(train_paths, train_labels)

NotADirectoryError: [Errno 20] Not a directory: '/content/drive/MyDrive/Training/Tr-pi_1403.jpg'

In [None]:
import random
import matplotlib.pyplot as plt
from PIL import Image
import os

# Select random indices for 10 images
random_indices = random.sample(range(len(train_paths)), 10)

# Create a figure to display images in 2 rows
fig, axes = plt.subplots(2, 5, figsize=(15, 8))
axes = axes.ravel()

for i, idx in enumerate(random_indices):
    # Load image
    img_path = train_paths[idx]
    img = Image.open(img_path)
    img = img.resize((224, 224))  # Resize to consistent size

    # Display image
    axes[i].imshow(img)
    axes[i].axis('off')  # Hide axis
    # Display class label in the second row
    axes[i].set_title(f"Label: {train_labels[idx]}", fontsize=10)

plt.tight_layout()
plt.show()

In [None]:
# Image Augmentation function
def augment_image(image):
    image = Image.fromarray(np.uint8(image))
    image = ImageEnhance.Brightness(image).enhance(random.uniform(0.8, 1.2))  # Random brightness
    image = ImageEnhance.Contrast(image).enhance(random.uniform(0.8, 1.2))  # Random contrast
    image = np.array(image) / 255.0  # Normalize pixel values to [0, 1]
    return image

# Load images and apply augmentation
def open_images(paths):
    images = []
    IMAGE_SIZE = 244
    for path in paths:
        image = load_img(path, target_size=(IMAGE_SIZE, IMAGE_SIZE))
        image = augment_image(image)
        images.append(image)
    return np.array(images)

# Encoding labels (convert label names to integers)
def encode_label(labels):
    unique_labels = os.listdir(train_dir)  # Ensure unique labels are determined
    encoded = [unique_labels.index(label) for label in labels]
    return np.array(encoded)

# Data generator for batching
def datagen(paths, labels, batch_size=12, epochs=1):
    for _ in range(epochs):
        for i in range(0, len(paths), batch_size):
            batch_paths = paths[i:i + batch_size]
            batch_images = open_images(batch_paths)  # Open and augment images
            batch_labels = labels[i:i + batch_size]
            batch_labels = encode_label(batch_labels)  # Encode labels
            yield batch_images, batch_labels  # Yield the batch

In [None]:
# Load VGG16 model without top layers for feature extraction
def create_vgg16_feature_extractor():
    IMAGE_SIZE = 244
    vgg_base = VGG16(weights='imagenet',
                     include_top=False,
                     input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))

    # Freeze all layers
    for layer in vgg_base.layers:
        layer.trainable = False

    return vgg_base

# Feature extraction function
def extract_features(paths, labels, vgg_model, batch_size=32):
    all_features = []
    all_encoded_labels = []

    # Encode all labels first
    encoded_labels = encode_label(labels)

    # Process in batches to avoid memory issues
    for i in range(0, len(paths), batch_size):
        batch_paths = paths[i:i + batch_size]
        batch_labels = encoded_labels[i:i + batch_size]

        # Load and preprocess images
        batch_images = open_images(batch_paths)

        # Extract features using VGG16
        features = vgg_model.predict(batch_images, verbose=0)
        features_flat = features.reshape(features.shape[0], -1)

        all_features.append(features_flat)
        all_encoded_labels.extend(batch_labels)

    # Combine all features
    all_features = np.vstack(all_features)
    all_encoded_labels = np.array(all_encoded_labels)

    return all_features, all_encoded_labels

In [None]:
# Create and train the hybrid model
def create_hybrid_model():
    # Create VGG16 feature extractor
    vgg_model = create_vgg16_feature_extractor()

    # Extract features from training data
    train_features, train_encoded_labels = extract_features(train_paths, train_labels, vgg_model)

    print(f"Training features shape: {train_features.shape}")
    print(f"Training labels shape: {train_encoded_labels.shape}")

    # Create and train Random Forest classifier
    rf_classifier = RandomForestClassifier(
        n_estimators=100,
        max_depth=20,
        random_state=42,
        n_jobs=-1  # Use all available cores
    )

    rf_classifier.fit(train_features, train_encoded_labels)

    return vgg_model, rf_classifier

# ====Using VGG and sequential models : 1st prototype====
#     # Model architecture
# IMAGE_SIZE = 128  # Image size (adjust based on your requirements)
# base_model = VGG16(input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3), include_top=False, weights='imagenet')

# # Freeze all layers of the VGG16 base model
# for layer in base_model.layers:
#     layer.trainable = False

# # Set the last few layers of the VGG16 base model to be trainable
# base_model.layers[-2].trainable = True
# base_model.layers[-3].trainable = True
# base_model.layers[-4].trainable = True

# # Build the final model
# model = Sequential()
# model.add(Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3)))  # Input layer
# model.add(base_model)  # Add VGG16 base model
# model.add(Flatten())  # Flatten the output of the base model
# model.add(Dropout(0.3))  # Dropout layer for regularization
# model.add(Dense(128, activation='relu'))  # Dense layer with ReLU activation
# model.add(Dropout(0.2))  # Dropout layer for regularization
# model.add(Dense(len(os.listdir(train_dir)), activation='softmax'))  # Output layer with softmax activation

# # Compile the model
# model.compile(optimizer=Adam(learning_rate=0.0001),
#               loss='sparse_categorical_crossentropy',
#               metrics=['sparse_categorical_accuracy'])

# # Parameters
# batch_size = 20
# steps = int(len(train_paths) / batch_size)  # Steps per epoch
# epochs = 5

# # Train the model
# history = model.fit(datagen(train_paths, train_labels, batch_size=batch_size, epochs=epochs),
#                     epochs=epochs, steps_per_epoch=steps)

In [None]:
# Function to make predictions
def predict_images(paths, vgg_model, rf_classifier, batch_size=32):
    all_predictions = []
    all_probabilities = []

    # Get unique labels for decoding
    unique_labels = os.listdir(train_dir)

    for i in range(0, len(paths), batch_size):
        batch_paths = paths[i:i + batch_size]

        # Load and preprocess images
        batch_images = open_images(batch_paths)

        # Extract features using VGG16
        features = vgg_model.predict(batch_images, verbose=0)
        features_flat = features.reshape(features.shape[0], -1)

        # Make predictions
        batch_predictions = rf_classifier.predict(features_flat)
        batch_probabilities = rf_classifier.predict_proba(features_flat)

        # Decode predictions back to label names
        decoded_predictions = [unique_labels[pred] for pred in batch_predictions]

        all_predictions.extend(decoded_predictions)
        all_probabilities.append(batch_probabilities)

    all_probabilities = np.vstack(all_probabilities)
    return all_predictions, all_probabilities
