In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from PIL import Image
from sklearn.model_selection import train_test_split
from sklearn.metrics import classification_report, confusion_matrix

import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.utils import to_categorical

# For encryption
from cryptography.fernet import Fernet
import io

# Mount Google Drive
from google.colab import drive
drive.mount('/content/drive')

# Set paths
train_dir = '/content/drive/MyDrive/mini_proj/Training'
test_dir = '/content/drive/MyDrive/mini_proj/Testing'

# Define parameters
IMG_SIZE = 150  # Reduced from 224 to save memory
BATCH_SIZE = 16  # Reduced from 32 to save memory
EPOCHS = 20  # Reduced from 30
NUM_CLASSES = 4  # glioma, meningioma, notumor, pituitary

# Generate encryption key
key = Fernet.generate_key()
cipher_suite = Fernet(key)

# Modified encryption/decryption to process in batches
def process_in_batches(directory, process_func, batch_size=100):
    all_data = []
    all_labels = []
    class_names = sorted(os.listdir(directory))

    for class_idx, class_name in enumerate(class_names):
        class_dir = os.path.join(directory, class_name)
        image_files = os.listdir(class_dir)

        # Process in batches to save memory
        for i in range(0, len(image_files), batch_size):
            batch_files = image_files[i:i+batch_size]
            batch_data = []

            for img_name in batch_files:
                img_path = os.path.join(class_dir, img_name)
                processed_data = process_func(img_path)
                batch_data.append(processed_data)

            all_data.extend(batch_data)
            all_labels.extend([class_name] * len(batch_data))

    return all_data, all_labels, class_names

# Encryption function
def encrypt_image(image_path):
    with open(image_path, 'rb') as f:
        image_data = f.read()
    encrypted_data = cipher_suite.encrypt(image_data)
    return encrypted_data

# Decryption function
def decrypt_image(encrypted_data):
    decrypted_data = cipher_suite.decrypt(encrypted_data)
    image = Image.open(io.BytesIO(decrypted_data))
    return image

# Process dataset in memory-efficient way
print("Processing training data in batches...")
train_encrypted, train_labels, class_names = process_in_batches(train_dir, encrypt_image)
print("Processing testing data in batches...")
test_encrypted, test_labels, _ = process_in_batches(test_dir, encrypt_image)

# Memory-efficient generator for decrypted images
class DecryptedImageGenerator(tf.keras.utils.Sequence):
    def __init__(self, encrypted_images, labels, batch_size, img_size, num_classes, shuffle=True):
        self.encrypted_images = encrypted_images
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.num_classes = num_classes
        self.shuffle = shuffle
        self.label_to_num = {class_name:i for i, class_name in enumerate(sorted(set(labels)))}
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.encrypted_images) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index*self.batch_size:(index+1)*self.batch_size]
        batch_encrypted = [self.encrypted_images[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        X = np.zeros((len(batch_encrypted), self.img_size, self.img_size, 3), dtype=np.float32)
        y = np.zeros((len(batch_encrypted), self.num_classes), dtype=np.float32)

        for i, (encrypted_img, label) in enumerate(zip(batch_encrypted, batch_labels)):
            # Decrypt and process image
            img = decrypt_image(encrypted_img)
            if img.mode != 'RGB':
                img = img.convert('RGB')
            img = img.resize((self.img_size, self.img_size))
            img_array = np.array(img) / 255.0
            X[i] = img_array

            # Process label
            y[i, self.label_to_num[label]] = 1

        return X, y

    def on_epoch_end(self):
        self.indices = np.arange(len(self.encrypted_images))
        if self.shuffle:
            np.random.shuffle(self.indices)

# Create generators
train_generator = DecryptedImageGenerator(
    train_encrypted, train_labels,
    batch_size=BATCH_SIZE,
    img_size=IMG_SIZE,
    num_classes=NUM_CLASSES
)

test_generator = DecryptedImageGenerator(
    test_encrypted, test_labels,
    batch_size=BATCH_SIZE,
    img_size=IMG_SIZE,
    num_classes=NUM_CLASSES,
    shuffle=False
)

# Build a more memory-efficient model
def create_lightweight_model():
    model = Sequential([
        Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_SIZE, IMG_SIZE, 3)),
        MaxPooling2D(2, 2),

        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),

        Conv2D(64, (3, 3), activation='relu'),
        MaxPooling2D(2, 2),

        Flatten(),

        Dense(64, activation='relu'),
        Dropout(0.3),

        Dense(NUM_CLASSES, activation='softmax')
    ])

    model.compile(
        optimizer=Adam(learning_rate=0.0001),
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

model = create_lightweight_model()
model.summary()

# Callbacks
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=2, min_lr=0.00001)

# Train the model
history = model.fit(
    train_generator,
    steps_per_epoch=len(train_generator),
    epochs=EPOCHS,
    validation_data=test_generator,
    validation_steps=len(test_generator),
    callbacks=[early_stopping, reduce_lr]
)

# Evaluate the model
test_loss, test_acc = model.evaluate(test_generator)
print(f"Test Accuracy: {test_acc*100:.2f}%")

# Predictions
y_pred = model.predict(test_generator)
y_pred_classes = np.argmax(y_pred, axis=1)

# Get true labels
y_true = []
for i in range(len(test_generator)):
    _, labels = test_generator[i]
    y_true.extend(np.argmax(labels, axis=1))
y_true = np.array(y_true)

# Classification report
print("Classification Report:")
print(classification_report(y_true, y_pred_classes, target_names=class_names))

# Confusion matrix
def plot_confusion_matrix(y_true, y_pred, classes):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
                xticklabels=classes, yticklabels=classes)
    plt.title('Confusion Matrix')
    plt.xlabel('Predicted Label')
    plt.ylabel('True Label')
    plt.show()

plot_confusion_matrix(y_true, y_pred_classes, class_names)

# Plot training history
def plot_history(history):
    plt.figure(figsize=(12, 4))

    plt.subplot(1, 2, 1)
    plt.plot(history.history['accuracy'], label='Training Accuracy')
    plt.plot(history.history['val_accuracy'], label='Validation Accuracy')
    plt.title('Accuracy over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.subplot(1, 2, 2)
    plt.plot(history.history['loss'], label='Training Loss')
    plt.plot(history.history['val_loss'], label='Validation Loss')
    plt.title('Loss over Epochs')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.legend()

    plt.show()

plot_history(history)

# Save actual vs predicted labels (sample 100 for memory)
def save_sample_results(generator, model, classes, filename='actual_vs_predicted_sample.csv'):
    results = []
    sample_size = min(100, len(generator) * generator.batch_size)
    indices = np.random.choice(range(len(generator) * generator.batch_size), sample_size, replace=False)

    for idx in indices:
        batch_idx = idx // generator.batch_size
        item_idx = idx % generator.batch_size

        X, y = generator[batch_idx]
        if item_idx >= len(X):
            continue

        img = X[item_idx]
        true_label = classes[np.argmax(y[item_idx])]
        pred = model.predict(np.expand_dims(img, axis=0))
        pred_label = classes[np.argmax(pred)]

        results.append({
            'Image Index': idx,
            'Actual Label': true_label,
            'Predicted Label': pred_label,
            'Correct': true_label == pred_label
        })

    df = pd.DataFrame(results)
    df.to_csv(filename, index=False)
    print(f"Saved sample results to {filename}")
    return df

results_df = save_sample_results(test_generator, model, class_names)

# Visualize sample predictions
def visualize_sample_predictions(generator, model, classes, num_samples=8):
    plt.figure(figsize=(12, 12))
    sample_indices = np.random.choice(range(len(generator)), num_samples)

    for i, idx in enumerate(sample_indices):
        X, y = generator[idx]
        img_idx = np.random.randint(0, len(X))
        img = X[img_idx]
        true_label = classes[np.argmax(y[img_idx])]
        pred = model.predict(np.expand_dims(img, axis=0))
        pred_label = classes[np.argmax(pred)]

        plt.subplot(3, 3, i+1)
        plt.imshow(img)
        title_color = 'green' if true_label == pred_label else 'red'
        plt.title(f"True: {true_label}\nPred: {pred_label}", color=title_color)
        plt.axis('off')

    plt.tight_layout()
    plt.show()

visualize_sample_predictions(test_generator, model, class_names)

# Save the model
model.save('brain_tumor_classifier_light.h5')
print("Model saved as brain_tumor_classifier_light.h5")