# INSTALL LIBRARY

In [2]:
!pip install python-dotenv xgboost scikit-learn opencv-python-headless -q

# 1. MOUNT GOOGLE DRIVE

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
PROJECT_DIR = '/content/drive/MyDrive/Python/Brain-Tumor-Detection'
os.makedirs(PROJECT_DIR, exist_ok=True)
os.chdir(PROJECT_DIR)

# 2. IMPORT LIBRARY

In [None]:
import numpy as np
import pandas as pd
import cv2
from dotenv import load_dotenv
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from xgboost import XGBClassifier
from tensorflow.keras.applications import VGG16, InceptionV3, ResNet101, DenseNet201
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import zipfile
import warnings
warnings.filterwarnings('ignore')

# 3. LOAD FILE .env

In [None]:
env_path = os.path.join(PROJECT_DIR, '.env')
load_dotenv(env_path)
kaggle_username = os.getenv('KAGGLE_USERNAME')
kaggle_key = os.getenv('KAGGLE_KEY')

# 4. KONFIGURASI KAGGLE API

In [None]:
os.makedirs('/root/.kaggle', exist_ok=True)
import json
kaggle_json = {
    "username": kaggle_username,
    "key": kaggle_key
}

kaggle_json_path = '/root/.kaggle/kaggle.json'
with open(kaggle_json_path, 'w') as f:
    json.dump(kaggle_json, f)

os.chmod(kaggle_json_path, 0o600)

# 5. BUAT STRUKTUR FOLDER PROJECT

In [None]:
DATASETS_DIR = os.path.join(PROJECT_DIR, 'datasets')
RESULTS_DIR = os.path.join(PROJECT_DIR, 'results')
MODELS_DIR = os.path.join(PROJECT_DIR, 'models')

os.makedirs(DATASETS_DIR, exist_ok=True)
os.makedirs(RESULTS_DIR, exist_ok=True)
os.makedirs(MODELS_DIR, exist_ok=True)

print(f"Struktur folder:")
print(f"- Datasets: {DATASETS_DIR}")
print(f"- Results:  {RESULTS_DIR}")
print(f"- Models:   {MODELS_DIR}")

Struktur folder:
- Datasets: /content/drive/MyDrive/Python/Brain-Tumor-Detection/datasets
- Results:  /content/drive/MyDrive/Python/Brain-Tumor-Detection/results
- Models:   /content/drive/MyDrive/Python/Brain-Tumor-Detection/models


# 6. DOWNLOAD DATASETS

In [None]:
def download_datasets():
    dataset1_zip = os.path.join(DATASETS_DIR, 'brain-mri-images-for-brain-tumor-detection.zip')
    dataset2_zip = os.path.join(DATASETS_DIR, 'brain-tumor-detection.zip')

    dataset1_dir = os.path.join(DATASETS_DIR, 'dataset1')
    dataset2_dir = os.path.join(DATASETS_DIR, 'dataset2')

    if not os.path.exists(dataset1_dir):
        print("Download Dataset 1: Navoneel Brain Tumor...")
        os.system(f'kaggle datasets download -d navoneel/brain-mri-images-for-brain-tumor-detection -p {DATASETS_DIR}')

        with zipfile.ZipFile(dataset1_zip, 'r') as zip_ref:
            zip_ref.extractall(dataset1_dir)
        print("Dataset 1 berhasil didownload dan diekstrak")
    else:
        print("Dataset 1 sudah ada")

    if not os.path.exists(dataset2_dir):
        print("Download Dataset 2: Br35H Brain Tumor...")
        os.system(f'kaggle datasets download -d ahmedhamada0/brain-tumor-detection -p {DATASETS_DIR}')

        with zipfile.ZipFile(dataset2_zip, 'r') as zip_ref:
            zip_ref.extractall(dataset2_dir)
        print("Dataset 2 berhasil didownload dan diekstrak")
    else:
        print("Dataset 2 sudah ada")

download_datasets()

Dataset 1 sudah ada
Dataset 2 sudah ada


# 7. FUNCTION PREPROCESSING

In [None]:
def crop_brain_region(image):
    gray = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY)
    _, thresh = cv2.threshold(gray, 10, 255, cv2.THRESH_BINARY)

    contours, _ = cv2.findContours(thresh, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if contours:
        largest_contour = max(contours, key=cv2.contourArea)
        x, y, w, h = cv2.boundingRect(largest_contour)
        cropped = image[y:y+h, x:x+w]
        return cropped
    return image

def apply_median_filter(image):
    return cv2.medianBlur(image, 5)

def preprocess_image(img_path, target_size=(224, 224), apply_filter=True):
    try:
        img = cv2.imread(img_path)
        if img is None:
            return None
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

        img = crop_brain_region(img)

        img = cv2.resize(img, target_size)

        if apply_filter:
            img = apply_median_filter(img)

        return img
    except Exception as e:
        print(f"Error processing {img_path}: {e}")
        return None

# 8. DATA AUGMENTATION

In [None]:
def create_augmented_data(images, labels, augmentation_factor=3):
  datagen = ImageDataGenerator(
        horizontal_flip=True,
        vertical_flip=True,
        rotation_range=20,
        width_shift_range=0.1,
        height_shift_range=0.1
    )

  augmented_images = []
  augmented_labels = []

  for img, label in zip(images, labels):
        augmented_images.append(img)
        augmented_labels.append(label)

        img_reshaped = img.reshape((1,) + img.shape)

        count = 0
        for batch in datagen.flow(img_reshaped, batch_size=1):
            augmented_images.append(batch[0])
            augmented_labels.append(label)
            count += 1
            if count >= augmentation_factor:
                break

  return np.array(augmented_images), np.array(augmented_labels)

def load_dataset(dataset_path, dataset_type='dataset1'):
    images = []
    labels = []

    if dataset_type == 'dataset1':
        yes_path = os.path.join(dataset_path, 'yes')
        no_path = os.path.join(dataset_path, 'no')

        if os.path.exists(yes_path):
            for img_file in os.listdir(yes_path):
                if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img_path = os.path.join(yes_path, img_file)
                    img = preprocess_image(img_path, target_size=(224, 224))
                    if img is not None:
                        images.append(img)
                        labels.append(1)

        if os.path.exists(no_path):
            for img_file in os.listdir(no_path):
                if img_file.lower().endswith(('.jpg', '.jpeg', '.png')):
                    img_path = os.path.join(no_path, img_file)
                    img = preprocess_image(img_path, target_size=(224, 224))
                    if img is not None:
                        images.append(img)
                        labels.append(0)

    elif dataset_type == 'dataset2':
        yes_path = os.path.join(dataset_path, 'yes')
        no_path = os.path.join(dataset_path, 'no')

        if os.path.exists(yes_path):
            yes_files = [f for f in os.listdir(yes_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            for img_file in yes_files[:1500]:
                img_path = os.path.join(yes_path, img_file)
                img = preprocess_image(img_path, target_size=(224, 224))
                if img is not None:
                    images.append(img)
                    labels.append(1)

        if os.path.exists(no_path):
            no_files = [f for f in os.listdir(no_path) if f.lower().endswith(('.jpg', '.jpeg', '.png'))]
            for img_file in no_files[:1500]:
                img_path = os.path.join(no_path, img_file)
                img = preprocess_image(img_path, target_size=(224, 224))
                if img is not None:
                    images.append(img)
                    labels.append(0)

    return np.array(images), np.array(labels)

def extract_features_vgg16(images):
    base_model = VGG16(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))
    features = base_model.predict(images, batch_size=32, verbose=0)
    return features

def extract_features_inceptionv3(images):
    images_resized = np.array([cv2.resize(img, (299, 299)) for img in images])
    base_model = InceptionV3(weights='imagenet', include_top=False, pooling='avg', input_shape=(299, 299, 3))
    features = base_model.predict(images_resized, batch_size=32, verbose=0)
    return features

def extract_features_resnet101(images):
    base_model = ResNet101(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))
    features = base_model.predict(images, batch_size=32, verbose=0)
    return features

def extract_features_densenet201(images):
    base_model = DenseNet201(weights='imagenet', include_top=False, pooling='avg', input_shape=(224, 224, 3))
    features = base_model.predict(images, batch_size=32, verbose=0)
    return features

def extract_all_features(images):
    feat_vgg = extract_features_vgg16(images)
    feat_inc = extract_features_inceptionv3(images)
    feat_res = extract_features_resnet101(images)
    feat_den = extract_features_densenet201(images)

    return feat_vgg, feat_inc, feat_res, feat_den


# 9. PENGGABUNGAN FEATURE

In [None]:
def concatenate_features(feat_vgg, feat_inc, feat_res, feat_den=None):
    if feat_den is None:
        concatenated = np.concatenate([feat_vgg, feat_inc, feat_res], axis=1)
    else:
        concatenated = np.concatenate([feat_vgg, feat_inc, feat_res, feat_den], axis=1)

    return concatenated

# 10. GENETIC ALGORITHM FEATURE SELECTION

In [None]:
def genetic_feature_selection(X_train, y_train, n_features=500, population_size=50, generations=20):
    n_total_features = X_train.shape[1]

    def fitness_function(chromosome):
        selected_features = np.where(chromosome == 1)[0]
        if len(selected_features) == 0:
            return 0

        X_subset = X_train[:, selected_features]
        fitness = 0

        for j in range(X_subset.shape[0]):
            for k in range(X_subset.shape[1]):
                val = X_subset[j, k]
                fitness += np.log(val + 1e-8) * val

        fitness = fitness / X_subset.shape[0]
        return fitness

    population = []
    for _ in range(population_size):
        chromosome = np.zeros(n_total_features, dtype=int)
        selected_idx = np.random.choice(n_total_features, n_features, replace=False)
        chromosome[selected_idx] = 1
        population.append(chromosome)

    for generation in range(generations):
        fitness_scores = [fitness_function(chromo) for chromo in population]

        sorted_indices = np.argsort(fitness_scores)[::-1]
        population = [population[i] for i in sorted_indices]

        elite_size = int(0.1 * population_size)
        new_population = population[:elite_size]

        while len(new_population) < population_size:
            parent1 = population[np.random.randint(0, population_size // 2)]
            parent2 = population[np.random.randint(0, population_size // 2)]

            crossover_point = np.random.randint(1, n_total_features)
            child = np.concatenate([parent1[:crossover_point], parent2[crossover_point:]])

            mutation_rate = 0.01
            for i in range(len(child)):
                if np.random.random() < mutation_rate:
                    child[i] = 1 - child[i]

            new_population.append(child)

        population = new_population

    best_chromosome = population[0]
    selected_indices = np.where(best_chromosome == 1)[0]

    if len(selected_indices) > n_features:
        selected_indices = selected_indices[:n_features]
    elif len(selected_indices) < n_features:
        remaining = n_features - len(selected_indices)
        unselected = np.where(best_chromosome == 0)[0]
        additional = np.random.choice(unselected, remaining, replace=False)
        selected_indices = np.concatenate([selected_indices, additional])

    print(f"{len(selected_indices)} features terpilih dari {n_total_features} features")
    return selected_indices

# 11. TRAINING DAN EVALUASI

In [None]:
def train_and_evaluate(X_train, X_test, y_train, y_test, classifier_name='SVM'):
    if classifier_name == 'SVM':
        classifier = SVC(kernel='rbf', C=10.0, gamma='scale', random_state=42)
    elif classifier_name == 'RandomForest':
        classifier = RandomForestClassifier(
            n_estimators=200,
            max_depth=20,
            min_samples_split=2,
            min_samples_leaf=1,
            random_state=42
        )
    elif classifier_name == 'DecisionTree':
        classifier = DecisionTreeClassifier(
            max_depth=20,
            min_samples_split=2,
            min_samples_leaf=1,
            random_state=42
        )
    elif classifier_name == 'XGBoost':
        classifier = XGBClassifier(
            n_estimators=200,
            learning_rate=0.1,
            max_depth=8,
            subsample=0.8,
            colsample_bytree=0.8,
            random_state=42,
            use_label_encoder=False,
            eval_metric='logloss'
        )

    print(f"Training {classifier_name}...")
    classifier.fit(X_train, y_train)

    y_pred = classifier.predict(X_test)

    accuracy = accuracy_score(y_test, y_pred)
    precision = precision_score(y_test, y_pred, average='binary')
    recall = recall_score(y_test, y_pred, average='binary')
    f1 = f1_score(y_test, y_pred, average='binary')

    results = {
        'Classifier': classifier_name,
        'Accuracy': accuracy,
        'Precision': precision,
        'Recall': recall,
        'F1-Score': f1
    }

    import pickle
    model_path = os.path.join(MODELS_DIR, f'{classifier_name}_model.pkl')
    with open(model_path, 'wb') as f:
        pickle.dump(classifier, f)
    print(f"Model disimpan di: {model_path}")

    return results, classifier


# 12. MAIN EXPERIMENT

In [None]:
def run_experiment(dataset_choice='dataset1'):
    print(f"{'='*60}")
    print(f"EKSPERIMEN DATASET: {dataset_choice.upper()}")
    print(f"{'='*60}")
    if dataset_choice == 'dataset1':
        dataset_dir = os.path.join(DATASETS_DIR, 'dataset1')
        images, labels = load_dataset(dataset_dir, 'dataset1')
    else:
        dataset_dir = os.path.join(DATASETS_DIR, 'dataset2')
        images, labels = load_dataset(dataset_dir, 'dataset2')

    print(f"\nTotal Dataset: {len(images)} images")
    print(f"- Tumor: {np.sum(labels == 1)}")
    print(f"- Non-tumor: {np.sum(labels == 0)}")

    print("\nMelakukan augmentasi data...")
    images_aug, labels_aug = create_augmented_data(images, labels, augmentation_factor=3)
    print(f"Augmented dataset: {len(images_aug)} images")

    images_aug = images_aug.astype('float32') / 255.0

    X_train, X_test, y_train, y_test = train_test_split(
        images_aug, labels_aug, test_size=0.2, random_state=42, stratify=labels_aug
    )

    print(f"\nData split:")
    print(f"- Training: {len(X_train)} images")
    print(f"- Testing: {len(X_test)} images")

    print("\nEkstraksi Deep Features...")
    feat_vgg_train, feat_inc_train, feat_res_train, feat_den_train = extract_all_features(X_train)
    feat_vgg_test, feat_inc_test, feat_res_test, feat_den_test = extract_all_features(X_test)

    print("\nPenggabungan features dari 3 model (VGG-16, Inception V3, ResNet-101)...")
    X_train_concat = concatenate_features(feat_vgg_train, feat_inc_train, feat_res_train)
    X_test_concat = concatenate_features(feat_vgg_test, feat_inc_test, feat_res_test)

    print(f"Concatenated features shape: {X_train_concat.shape}")

    selected_features = genetic_feature_selection(X_train_concat, y_train, n_features=500)
    X_train_selected = X_train_concat[:, selected_features]
    X_test_selected = X_test_concat[:, selected_features]

    print(f"\nSelected features shape: {X_train_selected.shape}")
    features_path = os.path.join(RESULTS_DIR, f'selected_features_{dataset_choice}.npy')
    np.save(features_path, selected_features)

    print("\nTraining dan Evaluasi Model...")
    classifiers = ['SVM', 'RandomForest', 'DecisionTree', 'XGBoost']
    all_results = []
    for clf_name in classifiers:
        results, model = train_and_evaluate(X_train_selected, X_test_selected, y_train, y_test, clf_name)
        all_results.append(results)
        print(f"{clf_name} Results:")
        print(f"Accuracy:  {results['Accuracy']:.4f} ({results['Accuracy']*100:.2f}%)")
        print(f"Precision: {results['Precision']:.4f}")
        print(f"Recall:    {results['Recall']:.4f}")
        print(f"F1-Score:  {results['F1-Score']:.4f}\n")

    results_df = pd.DataFrame(all_results)
    print(f"\n{'='*60}")
    print("HASIL PERFORMA SETIAP MODEL")
    print(f"{'='*60}")
    print(results_df.round(4).to_string(index=False))

    best_result = results_df.loc[results_df['Accuracy'].idxmax()]
    print(f"\nModel Terbaik: {best_result['Classifier']} - dengan Accuracy {best_result['Accuracy']:.4f} ({best_result['Accuracy']*100:.2f}%)")


    results_csv = os.path.join(RESULTS_DIR, f'results_{dataset_choice}.csv')
    results_df.to_csv(results_csv, index=False)
    print(f"\nHasil Training dan Evaluasi Model disimpan di: {results_csv}")

    return results_df

# 13. RUN EKSPERIMEN

In [None]:
print("\n" + "="*60)
print("REPLIKASI JURNAL - BRAIN TUMOR DETECTION")
print("="*60)

results_dataset1 = run_experiment('dataset1')
results_dataset2 = run_experiment('dataset2')

print("\nEKSPERIMEN SELESAI!")
print(f"\nhasil tersimpan di:")
print(f"- {RESULTS_DIR}")
print(f"- {MODELS_DIR}")


REPLIKASI JURNAL - BRAIN TUMOR DETECTION

Memulai Eksperimen Dataset 1...
EKSPERIMEN DATASET: DATASET1


Loading dataset...

Dataset loaded: 253 images
- Tumor: 155
- Non-tumor: 98

Melakukan augmentasi data...
Augmented dataset: 1012 images

Data split:
- Training: 809 images
- Testing: 203 images

Ekstraksi Deep Features...
Mengekstrak features dari VGG-16...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/vgg16/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m58889256/58889256[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Mengekstrak features dari Inception V3...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
[1m87910968/87910968[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 0us/step
Mengekstrak features dari ResNet-101...
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/r



Mengekstrak features dari ResNet-101...




Mengekstrak features dari DenseNet-201...

Konkatenasi features dari 3 model (VGG-16, Inception V3, ResNet-101)...
Concatenated features shape: (809, 4608)
500 features terpilih dari 4608 features

Selected features shape: (809, 500)
Selected features indices disimpan di: /content/drive/MyDrive/Python/Brain-Tumor-Detection/results/selected_features_dataset1.npy

Training dan Evaluasi Model...
Training SVM...
Model disimpan di: /content/drive/MyDrive/Python/Brain-Tumor-Detection/models/SVM_model.pkl

SVM Results:
Accuracy:  0.8966 (89.66%)
Precision: 0.9055
Recall:    0.9274
F1-Score:  0.9163
Training RandomForest...
Model disimpan di: /content/drive/MyDrive/Python/Brain-Tumor-Detection/models/RandomForest_model.pkl

RandomForest Results:
Accuracy:  0.8916 (89.16%)
Precision: 0.8864
Recall:    0.9435
F1-Score:  0.9141
Training DecisionTree...
Model disimpan di: /content/drive/MyDrive/Python/Brain-Tumor-Detection/models/DecisionTree_model.pkl

DecisionTree Results:
Accuracy:  0.8177 (81.