<a href="https://colab.research.google.com/github/kalaxander/AcneDetectorApp/blob/main/Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# This is a code cell
import tensorflow as tf
print(f"TensorFlow version: {tf.__version__}")

TensorFlow version: 2.19.0


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
!ls

acne_transfer_model_best.h5  drive  sample_data


In [None]:
!unzip /content/drive/MyDrive/dataset.zip

Archive:  /content/drive/MyDrive/dataset.zip
   creating: dataset/
   creating: dataset/test/
   creating: dataset/test/level0/
   creating: dataset/test/level1/
   creating: dataset/test/level2/
   creating: dataset/test/level3/
   creating: dataset/train/
   creating: dataset/train/level0/
  inflating: dataset/train/level0/levle0_0.jpg  
  inflating: dataset/train/level0/levle0_1.jpg  
  inflating: dataset/train/level0/levle0_10.jpg  
  inflating: dataset/train/level0/levle0_100.jpg  
  inflating: dataset/train/level0/levle0_101.jpg  
  inflating: dataset/train/level0/levle0_102.jpg  
  inflating: dataset/train/level0/levle0_103.jpg  
  inflating: dataset/train/level0/levle0_104.jpg  
  inflating: dataset/train/level0/levle0_105.jpg  
  inflating: dataset/train/level0/levle0_106.jpg  
  inflating: dataset/train/level0/levle0_107.jpg  
  inflating: dataset/train/level0/levle0_108.jpg  
  inflating: dataset/train/level0/levle0_109.jpg  
  inflating: dataset/train/level0/levle0_11.jpg  

In [None]:
import os
import shutil
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.preprocessing import image
from google.colab import drive

# ======================
# STEP 0: MOUNT GOOGLE DRIVE (FOR SAVING THE FINAL MODEL)
# ======================
# This will prompt you for authorization. Follow the link to get your key.
drive.mount('/content/drive')

# ======================
# PATHS (UPDATED FOR COLAB)
# ======================
# These paths assume you have unzipped 'dataset.zip' in the main Colab directory
train_dir = 'dataset/train'
val_dir = 'dataset/val'
unlabeled_dir = 'dataset/unlabeled_dataset' # CHANGED: No longer a local Windows path
pseudo_labeled_dir = 'pseudo_labeled_dataset'

# Define where to save the final model and chart in your Google Drive
output_model_path = '/content/drive/MyDrive/acne_transfer_model_finetuned.h5'
output_chart_path = '/content/drive/MyDrive/training_performance_finetuned.png'

IMAGE_SIZE = (224, 224)
BATCH_SIZE = 16

# ======================
# STEP 1: LOAD TRAINED MODEL
# ======================
# Assumes 'acne_transfer_model_best.h5' is uploaded to the main Colab directory
model = load_model("acne_transfer_model_best.h5")

# ======================
# STEP 2: CREATE PSEUDO-LABELED DATASET
# ======================
os.makedirs(pseudo_labeled_dir, exist_ok=True)

# Create class folders level 0–3
for i in range(4): # 4 classes
    os.makedirs(os.path.join(pseudo_labeled_dir, f"level {i}"), exist_ok=True)

print("Starting pseudo-labeling on unlabeled images...")
for img_name in os.listdir(unlabeled_dir):
    img_path = os.path.join(unlabeled_dir, img_name)

    # Skip non-image files
    if not img_name.lower().endswith((".jpg", ".jpeg", ".png")):
        continue

    # Load + preprocess image
    img = image.load_img(img_path, target_size=IMAGE_SIZE)
    x = image.img_to_array(img)
    x = np.expand_dims(x, axis=0)
    x = tf.keras.applications.mobilenet_v2.preprocess_input(x)

    preds = model.predict(x, verbose=0)
    pred_class = np.argmax(preds, axis=1)[0]
    confidence = np.max(preds)

    # Only keep images with high confidence
    if confidence > 0.85:
        shutil.copy(img_path, os.path.join(pseudo_labeled_dir, f"level {pred_class}", img_name))

print("✅ Pseudo-labeling complete. Images saved in:", pseudo_labeled_dir)

# ======================
# STEP 3: CREATE NEW DATA GENERATORS (MERGING LABELED + PSEUDO-LABELED)
# ======================
train_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    rotation_range=20,
    zoom_range=0.2,
    horizontal_flip=True,
    width_shift_range=0.2,
    height_shift_range=0.2,
    shear_range=0.2,
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

val_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
    preprocessing_function=tf.keras.applications.mobilenet_v2.preprocess_input
)

# Create a generator for the original training data
combined_train_gen = train_datagen.flow_from_directory(
    directory='dataset/train',
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Create a generator for the new pseudo-labeled data
pseudo_train_gen = train_datagen.flow_from_directory(
    directory=pseudo_labeled_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# Combine datasets into a single tf.data.Dataset for training
train_dataset = tf.data.Dataset.from_generator(
    lambda: combined_train_gen,
    output_signature=(
        tf.TensorSpec(shape=(None, *IMAGE_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 4), dtype=tf.float32)
    )
)

pseudo_dataset = tf.data.Dataset.from_generator(
    lambda: pseudo_train_gen,
    output_signature=(
        tf.TensorSpec(shape=(None, *IMAGE_SIZE, 3), dtype=tf.float32),
        tf.TensorSpec(shape=(None, 4), dtype=tf.float32)
    )
)

# Concatenate the two datasets
train_dataset = train_dataset.concatenate(pseudo_dataset)

# Create the validation generator
val_gen = val_datagen.flow_from_directory(
    val_dir,
    target_size=IMAGE_SIZE,
    batch_size=BATCH_SIZE,
    class_mode='categorical'
)

# ======================
# STEP 4: FINE-TUNE THE MODEL
# ======================
base_model = model.layers[0] # extract MobileNetV2 backbone
base_model.trainable = True  # unfreeze for fine-tuning

# Re-compile with a smaller learning rate for fine-tuning
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Callbacks
early_stop = tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint(
    filepath=output_model_path, # CHANGED: Saves to your Google Drive
    save_best_only=True,
    monitor='val_accuracy',
    mode='max'
)

print("Starting model fine-tuning...")
history = model.fit(
    train_dataset,
    validation_data=val_gen,
    epochs=20,
    callbacks=[early_stop, model_checkpoint]
)
print("✅ Model fine-tuning complete.")

# ======================
# STEP 5: PLOT TRAINING RESULTS
# ======================
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(acc))

plt.figure(figsize=(12, 6))

plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, 'b-o', label='Train Accuracy')
plt.plot(epochs_range, val_acc, 'r-o', label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, 'b-o', label='Train Loss')
plt.plot(epochs_range, val_loss, 'r-o', label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.grid(True)

plt.tight_layout()
plt.savefig(output_chart_path) # CHANGED: Saves chart to your Google Drive
plt.show()

print(f"✅ Training chart saved to: {output_chart_path}")

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).




Starting pseudo-labeling on unlabeled images...
✅ Pseudo-labeling complete. Images saved in: pseudo_labeled_dataset
Found 1473 images belonging to 4 classes.
Found 46 images belonging to 4 classes.
Found 143 images belonging to 4 classes.
Starting model fine-tuning...
Epoch 1/20
   3160/Unknown [1m3042s[0m 945ms/step - accuracy: 0.5696 - loss: 1.0078