In [1]:
# Params
EPOCHS = 20
BATCH_SIZE = 32
SHUFFLE = True
IMAGE_SIZE = (150, 150)

# Data Loading

In [2]:
# Ignoring warnings
import warnings
warnings.simplefilter("ignore")

In [3]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Data augmentation for training data
train_datagen = ImageDataGenerator(
    rescale=1./255,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    fill_mode='nearest'
)

# No augmentation for testing data, only rescaling
test_datagen = ImageDataGenerator(rescale=1./255)

2024-06-29 06:53:03.812682: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-29 06:53:03.812805: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-29 06:53:03.946993: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


In [4]:
# Data paths
train_dir = '/kaggle/input/brain-tumor-mri-dataset/Training'
test_dir = '/kaggle/input/brain-tumor-mri-dataset/Testing'

# Training data
train_generator = train_datagen.flow_from_directory(
    train_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=SHUFFLE
)

# Testing data
validation_generator = test_datagen.flow_from_directory(
    test_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical',
    shuffle=SHUFFLE
)

Found 5712 images belonging to 4 classes.
Found 1311 images belonging to 4 classes.


In [5]:
import numpy as np
from tensorflow.keras.utils import Sequence

class MultiInputDataGenerator(Sequence):
    def __init__(self, generator, batch_size):
        self.generator = generator
        self.batch_size = batch_size

    def __len__(self):
        return len(self.generator)

    def __getitem__(self, index):
        batch_x, batch_y = self.generator[index]
        return (batch_x, batch_x), batch_y

    def on_epoch_end(self):
        self.generator.on_epoch_end()

# Custom data generators for dual input
train_multi_input_gen = MultiInputDataGenerator(train_generator, batch_size=BATCH_SIZE)
val_multi_input_gen = MultiInputDataGenerator(validation_generator, batch_size=BATCH_SIZE)

# Data Analysis

In [6]:
import os
import plotly.graph_objects as go
from plotly.subplots import make_subplots

# Function to count files in each directory
def count_files(directory):
    class_counts = {}
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        class_counts[class_dir] = len(os.listdir(class_path))
    return class_counts

# Count files in training and testing directories
train_counts = count_files(train_dir)
test_counts = count_files(test_dir)

# Plotting class distributions with Pie charts
fig = make_subplots(rows=1, cols=2, specs=[[{'type':'domain'}, {'type':'domain'}]],
                    subplot_titles=['Training Data Class Distribution', 'Testing Data Class Distribution'])

fig.add_trace(go.Pie(labels=list(train_counts.keys()), values=list(train_counts.values()), name="Training"),
              1, 1)
fig.add_trace(go.Pie(labels=list(test_counts.keys()), values=list(test_counts.values()), name="Testing"),
              1, 2)
fig.show()

In [7]:
import pandas as pd
from PIL import Image
import matplotlib.pyplot as plt

# Function to get image sizes
def get_image_sizes(directory):
    sizes = []
    for class_dir in os.listdir(directory):
        class_path = os.path.join(directory, class_dir)
        for img_name in os.listdir(class_path):
            img_path = os.path.join(class_path, img_name)
            with Image.open(img_path) as img:
                sizes.append(img.size)
    return sizes

# Get image sizes
train_sizes = get_image_sizes(train_dir)
test_sizes = get_image_sizes(test_dir)

In [8]:
import plotly.express as px

# Convert to DataFrame for easy plotting
train_sizes_df = pd.DataFrame(train_sizes, columns=['Width', 'Height'])
test_sizes_df = pd.DataFrame(test_sizes, columns=['Width', 'Height'])

# Plotting image sizes using Plotly
fig_train = px.scatter(train_sizes_df, x='Width', y='Height', title='Training Data Image Sizes')
fig_train.update_layout(width=800)
fig_train.show()

fig_test = px.scatter(test_sizes_df, x='Width', y='Height', title='Testing Data Image Sizes')
fig_test.update_layout(width=800)
fig_test.show()

In [None]:
# Function to calculate image birghtness
def calculate_brightness(image):
    img = np.array(image)
    return np.mean(img)

# Analyze brightness
train_brightness = []
for class_dir in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_dir)
    for img_name in os.listdir(class_path):
        img_path = os.path.join(class_path, img_name)
        with Image.open(img_path) as img:
            train_brightness.append(calculate_brightness(img))

# Plot brightness distribution
plt.figure(figsize=(10, 6))
sns.histplot(train_brightness, kde=True)
plt.title('Brightness Distribution in Training Data')
plt.xlabel('Brightness')
plt.ylabel('Frequency')
plt.show()

In [None]:
# Check normalization
batch = next(train_generator)
print(f'Batch shape: {batch[0].shape}')
print(f'Min pixel value: {batch[0].min()}')
print(f'Max pixel value: {batch[0].max()}')

# Ensure labels are encoded correctly
print(f'Class indices: {train_generator.class_indices}')

# Get file paths and labels
train_file_paths = []
train_labels = []
for class_dir in os.listdir(train_dir):
    class_path = os.path.join(train_dir, class_dir)
    for img_name in os.listdir(class_path):
        train_file_paths.append(os.path.join(class_path, img_name))
        train_labels.append(class_dir)

# Class weights
from sklearn.utils.class_weight import compute_class_weight

class_weights = compute_class_weight(
    'balanced',
    classes=np.unique(train_labels),
    y=train_labels
)
class_weights = dict(enumerate(class_weights))
print(f'Class weights: {class_weights}')

# Modeling

## a) Model Designing

In [None]:
from tensorflow.keras.applications import VGG16, InceptionV3
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Dropout, Flatten, concatenate, BatchNormalization, GlobalAveragePooling2D

def create_pretrained_model_1(input_shape):
    base_model = VGG16(weights='imagenet', include_top=False, input_shape=input_shape)
    
    for layer in base_model.layers[:10]:
        layer.trainable = True
        
    for layer in base_model.layers[10:]:
        layer.trainable = False
        
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    
    return base_model.input, x

def create_pretrained_model_2(input_shape):
    
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    
    for layer in base_model.layers[:10]:
        layer.trainable = True
        
    for layer in base_model.layers[10:]:
        layer.trainable = False
        
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    x = Dense(256, activation='relu')(x)
    x = Dropout(0.5)(x)
    return base_model.input, x

def create_hybrid_model(input_shape):
    input_1, output_1 = create_pretrained_model_1(input_shape)
    input_2, output_2 = create_pretrained_model_2(input_shape)

    combined = concatenate([output_1, output_2])
    z = Dense(256, activation='relu')(combined)
    z = Dropout(0.5)(z)
    z = Dense(4, activation='softmax')(z)

    model = Model(inputs=[input_1, input_2], outputs=z)
    return model

input_shape = (150, 150, 3)
hybrid_model = create_hybrid_model(input_shape)
hybrid_model.summary()

In [None]:
from tensorflow.keras.utils import plot_model
plot_model(hybrid_model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

# Display the plot
from IPython.display import Image
Image(filename='model_plot.png')

In [None]:
# Compile the model
hybrid_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

## b) Model Training

In [None]:
# Callbacks
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
checkpoint = ModelCheckpoint('best_model.keras', monitor='val_loss', save_best_only=True, mode='min')
early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

callbacks = [checkpoint, early_stopping]

In [None]:
# Train the model
history = hybrid_model.fit(
    train_multi_input_gen,
#     steps_per_epoch=len(train_multi_input_gen),
    validation_data=val_multi_input_gen,
#     validation_steps=len(val_multi_input_gen),
    epochs=EPOCHS,
    class_weight=class_weights,
    callbacks=callbacks
)

In [None]:
# Saving final model
hybrid_model.save("trained_model.keras")