In [1]:
!unzip Brain_tumor.zip -d /content/


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1040.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1041.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1042.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1043.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1044.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1045.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1046.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1047.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1048.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1049.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1050.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1051.jpg  
  inflating: /content/Brain_tumor/Training/notumor/Tr-no_1052.jpg  
  inflating: /content/Brain_tumor/Training/notumor/

In [1]:
import tensorflow as tf
import numpy as np
import os
from tensorflow.keras import layers, models
from sklearn.metrics import precision_score, recall_score, accuracy_score
import cv2
import matplotlib.pyplot as plt

In [2]:
# def load_labeled_data(data_dir, image_size=(224, 224), batch_size=32):
#     """
#     Load labeled dataset from directories.
#     :param data_dir: Root directory containing subdirectories for each class.
#     :param image_size: Target size for images.
#     :param batch_size: Batch size for training.
#     :return: train_dataset, test_dataset
#     """
#     train_dataset = tf.keras.preprocessing.image_dataset_from_directory(
#         data_dir,
#         validation_split=0.2,
#         subset="training",
#         seed=123,
#         image_size=image_size,
#         batch_size=batch_size
#     )

#     test_dataset = tf.keras.preprocessing.image_dataset_from_directory(
#         data_dir,
#         validation_split=0.2,
#         subset="validation",
#         seed=123,
#         image_size=image_size,
#         batch_size=batch_size
#     )

#     return train_dataset, test_dataset

# def load_unlabeled_data(unlabeled_dir, image_size=(224, 224)):
#     """
#     Load unlabeled dataset from a directory.
#     :param unlabeled_dir: Directory containing unlabeled images.
#     :param image_size: Target size for images.
#     :return: NumPy array of images.
#     """
#     images = []
#     for filename in os.listdir(unlabeled_dir):
#         img_path = os.path.join(unlabeled_dir, filename)
#         img = cv2.imread(img_path)
#         if img is not None:
#             img = cv2.resize(img, image_size)
#             img = img / 255.0  # Normalize
#             images.append(img)

#     return np.array(images)


In [2]:
image_size = (224, 224)
batch_size = 32

In [3]:
# Load Training Data
train_data = tf.keras.preprocessing.image_dataset_from_directory(
    "Brain_tumor/Training",
    image_size=image_size,
    batch_size=batch_size,
    label_mode="int",
    shuffle=True
)

# Load Testing Data (Completely Unseen)
test_data = tf.keras.preprocessing.image_dataset_from_directory(
    "Brain_tumor/Testing",
    image_size=image_size,
    batch_size=batch_size,
    label_mode="int",
    shuffle=False
)

Found 2640 files belonging to 4 classes.
Found 600 files belonging to 4 classes.


In [3]:
# unlabeled_images = load_unlabeled_data("/content/Brain_tumor/unlabelled")

In [4]:
# train_data, test_data = load_labeled_data("/content/Brain_tumor")

In [4]:
def create_model():
    model = models.Sequential([
        layers.Input(shape=(224, 224, 3)),  # Explicit input layer
        layers.Conv2D(32, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(64, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Conv2D(128, (3, 3), activation='relu'),
        layers.MaxPooling2D((2, 2)),
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dense(4, activation='softmax')
    ])
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model


In [5]:
def train_model(model, train_data, test_data):
    model.fit(train_data, epochs=15, validation_data=test_data)
    return model


In [19]:

def pseudo_label(model, unlabeled_images, threshold=0.9):
    predictions = model.predict(unlabeled_images)
    confidences = np.max(predictions, axis=1)
    pseudo_labels = np.argmax(predictions, axis=1)
    high_confidence_idx = confidences >= threshold
    return unlabeled_images[high_confidence_idx], pseudo_labels[high_confidence_idx]

In [20]:
# Fine-tune model iteratively
def semi_supervised_learning(model, unlabeled_images, chunk_size=0.1):
    total_unlabeled = len(unlabeled_images)
    chunk_size = int(total_unlabeled * chunk_size)

    while len(unlabeled_images) > 0:
        chunk = unlabeled_images[:chunk_size]
        unlabeled_images = unlabeled_images[chunk_size:]

        new_images, new_labels = pseudo_label(model, chunk)
        if len(new_images) > 0:
            model.fit(new_images, new_labels, epochs=10)

    return model

In [8]:
def evaluate_model(model, test_data):
    predictions = np.argmax(model.predict(test_data.map(lambda x, y: x)), axis=1)
    true_labels = np.concatenate([y.numpy() for x, y in test_data], axis=0)
    precision = precision_score(true_labels, predictions, average='weighted')
    recall = recall_score(true_labels, predictions, average='weighted')
    accuracy = accuracy_score(true_labels, predictions)
    return precision, recall, accuracy



In [10]:
# data_dir = "/content/Brain_tumor"
# unlabeled_dir = "/content/Brain_tumor/unlabelled"

In [11]:
# train_data, test_data = load_labeled_data(data_dir)
# unlabeled_images = load_unlabeled_data(unlabeled_dir)

Found 7023 files belonging to 3 classes.
Using 5619 files for training.
Found 7023 files belonging to 3 classes.
Using 1404 files for validation.


In [9]:
model = create_model()
model = train_model(model, train_data, test_data)

Epoch 1/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m17s[0m 120ms/step - accuracy: 0.5256 - loss: 109.4705 - val_accuracy: 0.7583 - val_loss: 0.8464
Epoch 2/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 58ms/step - accuracy: 0.8922 - loss: 0.2757 - val_accuracy: 0.8017 - val_loss: 1.0210
Epoch 3/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 61ms/step - accuracy: 0.9481 - loss: 0.1500 - val_accuracy: 0.8100 - val_loss: 1.2913
Epoch 4/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.9685 - loss: 0.0879 - val_accuracy: 0.8017 - val_loss: 1.6673
Epoch 5/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 50ms/step - accuracy: 0.9772 - loss: 0.0673 - val_accuracy: 0.8017 - val_loss: 1.6429
Epoch 6/15
[1m83/83[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 69ms/step - accuracy: 0.9885 - loss: 0.0371 - val_accuracy: 0.8233 - val_loss: 1.6619
Epoch 7/15
[1m83/83[0m [32m

In [10]:
from sklearn.metrics import classification_report

y_pred = model.predict(test_data)
y_pred_classes = np.argmax(y_pred, axis=1)
true_labels = np.concatenate([y.numpy() for x, y in test_data], axis=0)
print(classification_report(true_labels, y_pred_classes))

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step
              precision    recall  f1-score   support

           0       0.69      0.83      0.75       150
           1       0.78      0.81      0.80       150
           2       0.93      0.99      0.95       150
           3       0.94      0.66      0.78       150

    accuracy                           0.82       600
   macro avg       0.84      0.82      0.82       600
weighted avg       0.84      0.82      0.82       600



In [11]:
precision_before, recall_before, accuracy_before = evaluate_model(model, test_data)

[1m19/19[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 39ms/step


In [12]:
print(f"Before Semi-Supervised Learning - Precision: {precision_before}, Recall: {recall_before}, Accuracy: {accuracy_before}")


Before Semi-Supervised Learning - Precision: 0.8356614637690057, Recall: 0.8216666666666667, Accuracy: 0.8216666666666667


In [13]:
# Function to load and preprocess unlabeled images
def load_unlabeled_images(directory, image_size):
    image_paths = [os.path.join(directory, fname) for fname in os.listdir(directory)]

    img_list = []
    for img_path in image_paths:
        img = tf.keras.preprocessing.image.load_img(img_path, target_size=image_size)
        img = tf.keras.preprocessing.image.img_to_array(img)
        img = img / 255.0  # Normalize to [0,1]
        img_list.append(img)

    return tf.convert_to_tensor(img_list)

# Load Unlabeled Images (No Labels)
unlabeled_images = load_unlabeled_images("Brain_tumor/unlabelled", image_size)

In [21]:
model = semi_supervised_learning(model, unlabeled_images)
precision_after, recall_after, accuracy_after = evaluate_model(model, test_data)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 25ms/step
Epoch 1/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 1.0000 - loss: 5.2956e-09
Epoch 2/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 48ms/step - accuracy: 1.0000 - loss: 6.4896e-09
Epoch 3/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 52ms/step - accuracy: 1.0000 - loss: 2.4774e-09
Epoch 4/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 1.0000 - loss: 4.1012e-09
Epoch 5/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 47ms/step - accuracy: 1.0000 - loss: 3.2416e-09
Epoch 6/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 1.0000 - loss: 1.1617e-08
Epoch 7/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 49ms/step - accuracy: 1.0000 - loss: 1.5056e-08
Epoch 8/10
[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m 

In [22]:
print(f"Before Semi-Supervised Learning - Precision: {precision_before}, Recall: {recall_before}, Accuracy: {accuracy_before}")
print(f"After Semi-Supervised Learning - Precision: {precision_after}, Recall: {recall_after}, Accuracy: {accuracy_after}")

Before Semi-Supervised Learning - Precision: 0.8356614637690057, Recall: 0.8216666666666667, Accuracy: 0.8216666666666667
After Semi-Supervised Learning - Precision: 0.8371883278604152, Recall: 0.8233333333333334, Accuracy: 0.8233333333333334
