### zzzz

In [None]:
import os
import urllib.request
import zipfile

import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
from tensorflow.keras.optimizers import SGD

In [None]:
# Download and extract the dataset (assuming it's a zip file)
url = "https://github.com/onesinus/datasets/raw/main/brain_tumor_detection.zip"
filename = "brain_tumor_detection.zip"
dataset_dir = "/content/dataset"  # Directory to store extracted data

# Create the directory if it doesn't exist
os.makedirs(dataset_dir, exist_ok=True)

if not os.path.exists(filename):
  print("Downloading dataset...")
  urllib.request.urlretrieve(url, filename)
  print("Download complete.")

# Extract the downloaded zip file
if os.path.exists(filename):
  print("Extracting dataset...")
  with zipfile.ZipFile(filename, 'r') as zip_ref:
    zip_ref.extractall(dataset_dir)
  os.remove(filename)  # Remove downloaded zip
  print("Extraction complete.")

# Define label mapping (assuming class labels are encoded as strings)
label_map = {'0': 0, '1': 1, '2': 2}  # Modify if class labels are encoded differently

# Image dimensions (adjust if images have different sizes)
img_width, img_height = 224, 224  # Example size, adjust based on your data

# Data augmentation (optional, experiment to see if it improves performance)
train_datagen = ImageDataGenerator(rescale=1./255, shear_range=0.2, zoom_range=0.2, horizontal_flip=True)
valid_datagen = ImageDataGenerator(rescale=1./255)

Downloading dataset...
Download complete.
Extracting dataset...
Extraction complete.


In [None]:
def load_data(data_dir, images_dir_name, labels_dir_name, label_map):
  """Loads images and labels from separate directories, handling multiple labels per file."""
  images = []
  labels = []
  for filename in os.listdir(os.path.join(data_dir, images_dir_name)):
    if filename.endswith(".jpg"):
      img_path = os.path.join(data_dir, images_dir_name, filename)
      img = tf.keras.preprocessing.image.load_img(img_path, target_size=(img_width, img_height))
      img_array = tf.keras.preprocessing.image.img_to_array(img)
      img_array = img_array / 255.0  # Normalize pixel values

      label_filename = os.path.splitext(filename)[0] + ".txt"  # Remove extension and add .txt
      label_path = os.path.join(data_dir, labels_dir_name, label_filename)

      with open(label_path, 'r') as f:
        label_lines = f.readlines()  # Read all lines in the label file

      # Assuming multiple labels per line (modify if different)
      one_hot_labels = np.zeros(len(label_map))  # Initialize one-hot encoded labels
      for line in label_lines:
        label_str = line.strip()
        label_values = label_str.split()
        class_value = label_values[0]
        # print(label_str)
        if class_value in label_map:
          label_index = label_map[class_value]
          one_hot_labels[label_index] = 1.0  # Set corresponding label to 1 in one-hot vector

      images.append(img_array)
      labels.append(one_hot_labels)

  return np.array(images), np.array(labels)

import time # will be deleted later, just to save memory hehe

# Load training and validation data (assuming separate directories)
train_images, train_labels = load_data(dataset_dir, 'Brain Tumor Detection/train/images', 'Brain Tumor Detection/train/labels', label_map)
time.sleep(3)
validation_images, validation_labels = load_data(dataset_dir, 'Brain Tumor Detection/valid/images', 'Brain Tumor Detection/valid/labels', label_map)
time.sleep(3)
test_images, test_labels = load_data(dataset_dir, 'Brain Tumor Detection/test/images', 'Brain Tumor Detection/test/labels', label_map)

# Create training and validation generators
train_generator = train_datagen.flow(train_images, train_labels, batch_size=100)
time.sleep(3)
validation_generator = valid_datagen.flow(validation_images, validation_labels, batch_size=100)
time.sleep(3)
test_generator = valid_datagen.flow(test_images, test_labels, batch_size=100)

In [None]:
# Print shapes for training data
print(f"Shape of X (training images): {train_images.shape}")
print(f"Shape of Y (training labels): {train_labels.shape}")

# print(f"Training Image [0]: {train_images[0]}")
print(f"Training Label [0]: {train_labels[0]}")

for i in range(1,10):
  print(f"Training Label [{i}]: {train_labels[i]}")

# Print shapes for validation data
print(f"Shape of X (validation images): {validation_images.shape}")
print(f"Shape of Y (validation labels): {validation_labels.shape}")

# Assuming you have loaded test data using the same approach
print(f"Shape of X (test images): {test_images.shape}")
print(f"Shape of Y (test labels): {test_labels.shape}")

Shape of X (training images): (6930, 224, 224, 3)
Shape of Y (training labels): (6930, 3)
Training Label [0]: [1. 1. 1.]
Training Label [1]: [0. 1. 0.]
Training Label [2]: [0. 1. 0.]
Training Label [3]: [1. 1. 1.]
Training Label [4]: [1. 1. 1.]
Training Label [5]: [1. 1. 0.]
Training Label [6]: [1. 1. 1.]
Training Label [7]: [0. 1. 1.]
Training Label [8]: [1. 1. 1.]
Training Label [9]: [0. 1. 0.]
Shape of X (validation images): (1980, 224, 224, 3)
Shape of Y (validation labels): (1980, 3)
Shape of X (test images): (990, 224, 224, 3)
Shape of Y (test labels): (990, 3)


In [None]:
def create_model(num_conv_layers):
  """
  Creates a CNN model with the specified number of convolutional layers.

  Args:
      num_conv_layers: The number of convolutional layers to use.

  Returns:
      A compiled TensorFlow Keras model.
  """
  model = Sequential()

  # First convolutional layer
  model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_width, img_height, 3)))
  model.add(MaxPooling2D((2, 2)))

  # Additional convolutional layers (if specified)
  for _ in range(1, num_conv_layers):
    model.add(Conv2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2)))

  # Flatten the output
  model.add(Flatten())

  # Fully connected layers
  model.add(Dense(64, activation='relu'))
  model.add(Dense(1, activation='sigmoid'))  # Change to 'softmax' for multiple classes

  # Compile the model
  model.compile(loss='binary_crossentropy', optimizer=SGD(learning_rate=0.01), metrics=['accuracy'])

  return model

# Train models with different architectures (1 to 4 convolutional layers)
for num_conv_layers in range(1, 5):
  model = create_model(num_conv_layers)
  model.fit(
      train_generator,
      steps_per_epoch=len(train_generator),
      # epochs=50,
      epochs=1,
      validation_data=validation_generator,
      validation_steps=len(validation_generator)
  )

  # Evaluate the model on the test set (optional)
  test_loss, test_acc = model.evaluate(test_generator)
  print(f"Model with {num_conv_layers} convolutional layers: Test Accuracy = {test_acc:.4f}")



KeyboardInterrupt: 

Referensi yang harus dicoba:
https://www.kaggle.com/code/banddaniel/brain-tumor-detection-w-keras-yolo-v8

In [None]:
# # Import necessary libraries

# import os
# import zipfile
# import urllib.request
# import matplotlib.pyplot as plt

# import numpy as np
# import tensorflow as tf
# from tensorflow.keras.utils import to_categorical
# from tensorflow.keras.preprocessing.image import ImageDataGenerator
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense
# from tensorflow.keras.preprocessing.image import load_img, img_to_array

In [None]:
# # Download and extract the dataset
# url = "https://github.com/onesinus/datasets/raw/main/brain_tumor_detection.zip"
# filename = "brain_tumor_detection.zip"
# urllib.request.urlretrieve(url, filename)
# with zipfile.ZipFile(filename, 'r') as zip_ref:
#     zip_ref.extractall("dataset")

In [None]:
# def count_images_and_classes(data_dir):
#     num_images = sum(len(files) for _, _, files in os.walk(os.path.join(data_dir, 'images')))
#     unique_classes = set()
#     label_dir = os.path.join(data_dir, 'labels')
#     for label_file in os.listdir(label_dir):
#         label_path = os.path.join(label_dir, label_file)
#         with open(label_path, 'r') as f:
#             labels = f.readlines()
#         if labels:
#             for label in labels:
#                 class_label = int(label.split()[0])
#                 unique_classes.add(class_label)
#     num_classes = len(unique_classes)
#     return num_images, num_classes

# train_data_dir = 'dataset/Brain Tumor Detection/train'
# test_data_dir = 'dataset/Brain Tumor Detection/test'
# valid_data_dir = 'dataset/Brain Tumor Detection/valid'

# train_image_count, train_num_classes = count_images_and_classes(train_data_dir)
# test_image_count, test_num_classes = count_images_and_classes(test_data_dir)
# valid_image_count, valid_num_classes = count_images_and_classes(valid_data_dir)

# print(f'Found {train_image_count} images belonging to {train_num_classes} classes in the training directory.')
# print(f'Found {test_image_count} images belonging to {test_num_classes} classes in the test directory.')
# print(f'Found {valid_image_count} images belonging to {valid_num_classes} classes in the validation directory.')

# output
print("Found 6930 images belonging to 3 classes in the training directory.")
print("Found 990 images belonging to 3 classes in the test directory.")
print("Found 1980 images belonging to 3 classes in the validation directory.")

In [None]:
# from keras.utils import to_categorical

# def custom_data_generator(data_dir, batch_size=32, target_size=(150, 150), max_objects=3, num_classes=3):
#     while True:
#         # Get list of image files
#         image_files = os.listdir(os.path.join(data_dir, 'images'))
#         # np.random.shuffle(image_files)

#         # Iterate over batches
#         for i in range(0, len(image_files), batch_size):
#             batch_image_files = image_files[i:i+batch_size]
#             batch_images = []
#             batch_labels = []

#             # Load images and corresponding labels
#             for image_file in batch_image_files:
#                 # Load image
#                 image_path = os.path.join(data_dir, 'images', image_file)
#                 image = load_img(image_path, target_size=target_size)
#                 image_array = img_to_array(image) / 255.0  # Normalize pixel values
#                 batch_images.append(image_array)

#                 # Load label
#                 label_file = image_file.replace('.jpg', '.txt')
#                 label_path = os.path.join(data_dir, 'labels', label_file)
#                 with open(label_path, 'r') as f:
#                     label_lines = f.readlines()

#                 if label_lines:  # Check if label file is not empty
#                     labels = []
#                     for line in label_lines:
#                         label_data = line.strip().split(' ')
#                         # Update labels to reflect multiple classes
#                         class_id = int(label_data[0])  # Extract class ID
#                         labels.append(class_id)

#                     # Ensure labels are within the range of num_classes
#                     labels = [min(class_id, num_classes - 1) for class_id in labels]

#                     # Perform one-hot encoding
#                     one_hot_labels = to_categorical(labels, num_classes=num_classes)

#                     # Convert one-hot encoded labels to one-dimensional array
#                     one_dim_labels = np.argmax(one_hot_labels, axis=1)

#                     # Pad labels with zeros to ensure fixed length
#                     pad_length = max_objects - len(one_dim_labels)
#                     one_dim_labels = np.pad(one_dim_labels, (0, pad_length), mode='constant')
#                 else:
#                     # If label file is empty, add placeholders
#                     one_dim_labels = np.zeros((max_objects,), dtype=int)

#                 batch_labels.append(one_dim_labels)

#             yield np.array(batch_images), np.array(batch_labels)

# train_data_dir = 'dataset/Brain Tumor Detection/train'
# train_generator = custom_data_generator(train_data_dir, batch_size=100, target_size=(150, 150), num_classes=3)

# test_data_dir = 'dataset/Brain Tumor Detection/test'
# test_generator = custom_data_generator(test_data_dir, batch_size=100, target_size=(150, 150), num_classes=3)

# valid_data_dir = 'dataset/Brain Tumor Detection/valid'
# valid_generator = custom_data_generator(valid_data_dir, batch_size=100, target_size=(150, 150), num_classes=3)

# # Iterate over batches and print labels
# for i, (x_batch, y_batch) in enumerate(train_generator):
#     # Display the first image in the batch
#     plt.imshow(x_batch[0])
#     plt.axis('off')  # Turn off axis labels
#     plt.show()

#     print(f"Batch {i + 1} - Labels:")
#     print(y_batch[0])

#     if i == 5:  # Print labels for the first 5 batches for demonstration
#         break

In [None]:
# # Define CNN models
# models_configurations = [
#     # Configuration a
#     [
#         Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(128, activation='relu'),
#         Dense(3, activation='softmax')
#     ],
#     # Configuration b
#     [
#         Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
#         Conv2D(64, (3, 3), activation='relu'),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(128, activation='relu'),
#         Dense(3, activation='softmax')
#     ],
#     # Configuration c
#     [
#         Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
#         Conv2D(64, (3, 3), activation='relu'),
#         MaxPooling2D((2, 2)),
#         Conv2D(128, (3, 3), activation='relu'),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(128, activation='relu'),
#         Dense(3, activation='softmax')
#     ],
#     # Configuration d
#     [
#         Conv2D(32, (3, 3), activation='relu', input_shape=(150, 150, 3)),
#         Conv2D(64, (3, 3), activation='relu'),
#         MaxPooling2D((2, 2)),
#         Conv2D(128, (3, 3), activation='relu'),
#         Conv2D(256, (3, 3), activation='relu'),
#         MaxPooling2D((2, 2)),
#         Flatten(),
#         Dense(128, activation='relu'),
#         Dense(3, activation='softmax')
#     ]
# ]

In [None]:
# # Iterate over each model configuration
# for i, layers_config in enumerate(models_configurations):
#     print(f"Training Model {chr(97 + i)}:")

#     # Define model
#     model = Sequential(layers_config)

#     # Compile model
#     model.compile(optimizer='SGD', loss='categorical_crossentropy', metrics=['accuracy'])

#     # Train model using generator
#     history = model.fit(train_generator, epochs=1, steps_per_epoch=100,
#                         validation_data=valid_generator, validation_steps=50)

#     # Evaluate model
#     test_loss, test_acc = model.evaluate(test_generator, steps=50)
#     print(f"Test Accuracy for Model {chr(97 + i)}:", test_acc)

In [None]:
# # Iterate over each model configuration
# for i, layers_config in enumerate(models_configurations):
#     print(f"Training Model {chr(97 + i)}:")

#     # Define model
#     model = Sequential(layers_config)

#     # Compile model
#     model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

#     # Train model using generator
#     history = model.fit(train_generator, epochs=5, steps_per_epoch=100,
#                         validation_data=valid_generator, validation_steps=50)

#     # Evaluate model
#     test_loss, test_acc = model.evaluate(test_generator, steps=50)
#     print(f"Test Accuracy for Model {chr(97 + i)}:", test_acc)