In [1]:
# train_transfer.py
import os, math, json
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.applications import MobileNetV2
from tensorflow.keras.applications.mobilenet_v2 import preprocess_input

# ===== CONFIG =====
DATA_DIR = "data"  # expects data/train/<class>/... and data/test/<class>/...
IMG_SIZE_SRC = (48, 48)      # your dataset native size
IMG_SIZE_NET = (224, 224)    # size for MobileNetV2
BATCH = 64
EPOCHS_FROZEN = 6
EPOCHS_FINETUNE = 10
SEED = 1337
MODEL_OUT = "emotion_mnetv2.h5"
LABELS_OUT = "labels.json"

# Meet/FER label order (ensure your train/ folders are named exactly like this order below)
expected_classes = ["angry","disgust","fear","happy","neutral","sad","surprise"]

# ===== LOAD DATASETS (grayscale) =====
train_ds = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(DATA_DIR, "train"),
    labels="inferred",
    label_mode="int",
    color_mode="grayscale",
    batch_size=BATCH,
    image_size=IMG_SIZE_SRC,
    seed=SEED,
    shuffle=True
)

test_ds = tf.keras.preprocessing.image_dataset_from_directory(
    os.path.join(DATA_DIR, "test"),
    labels="inferred",
    label_mode="int",
    color_mode="grayscale",
    batch_size=BATCH,
    image_size=IMG_SIZE_SRC,
    seed=SEED,
    shuffle=False
)

# Check class names & ensure order is expected
class_names = train_ds.class_names
print("Detected classes:", class_names)
if set(class_names) != set(expected_classes):
    print("WARNING: Classes differ from expected list.")
# Reorder to expected if needed (optional)
# For consistency we rely on directory order returned by Keras; save it for inference:
with open(LABELS_OUT, "w") as f:
    json.dump(class_names, f)
print("Saved label order to", LABELS_OUT)

# ===== PERFORMANCE =====
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(AUTOTUNE)
test_ds  = test_ds.cache().prefetch(AUTOTUNE)

# ===== PREPROCESS PIPELINE =====
# 1) Convert 1-channel -> 3-channel
to_rgb = layers.Lambda(lambda x: tf.image.grayscale_to_rgb(x))
# 2) Resize to MobileNetV2 input
resizer = layers.Resizing(IMG_SIZE_NET[0], IMG_SIZE_NET[1])
# 3) Augmentations (light + realistic)
augment = keras.Sequential([
    layers.RandomFlip("horizontal"),
    layers.RandomRotation(0.08),
    layers.RandomZoom(0.08),
    layers.RandomContrast(0.15),
], name="augment")

# Wrap datasets with preprocessing
def prep(ds, training=False):
    ds = ds.map(lambda x, y: (to_rgb(x), y), num_parallel_calls=AUTOTUNE)
    ds = ds.map(lambda x, y: (resizer(x), y), num_parallel_calls=AUTOTUNE)
    if training:
        ds = ds.map(lambda x, y: (augment(x, training=True), y), num_parallel_calls=AUTOTUNE)
    # MobileNet preprocessing
    ds = ds.map(lambda x, y: (preprocess_input(x), y), num_parallel_calls=AUTOTUNE)
    return ds

train_ds_pp = prep(train_ds, training=True)
test_ds_pp  = prep(test_ds, training=False)

num_classes = len(class_names)

# ===== MODEL (Transfer Learning: MobileNetV2) =====
base = MobileNetV2(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE_NET[0], IMG_SIZE_NET[1], 3)
)
base.trainable = False  # stage 1: freeze base

inputs = layers.Input(shape=(IMG_SIZE_NET[0], IMG_SIZE_NET[1], 3))
x = inputs
x = base(x, training=False)
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(num_classes, activation="softmax")(x)
model = keras.Model(inputs, outputs)

model.compile(
    optimizer=keras.optimizers.Adam(1e-3),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

# ===== TRAIN (frozen) =====
cb = [
    keras.callbacks.EarlyStopping(patience=3, restore_best_weights=True, monitor="val_accuracy"),
]
history = model.fit(
    train_ds_pp,
    validation_data=test_ds_pp,
    epochs=EPOCHS_FROZEN,
    callbacks=cb
)

# ===== FINE-TUNE: unfreeze top of base =====
# Unfreeze last ~30 layers (heuristic)
for layer in base.layers[-30:]:
    if not isinstance(layer, layers.BatchNormalization):
        layer.trainable = True

model.compile(
    optimizer=keras.optimizers.Adam(1e-4),
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]
)

history_ft = model.fit(
    train_ds_pp,
    validation_data=test_ds_pp,
    epochs=EPOCHS_FINETUNE,
    callbacks=cb
)

# ===== EVALUATE & SAVE =====
test_loss, test_acc = model.evaluate(test_ds_pp)
print(f"Test accuracy: {test_acc:.4f}")

model.save(MODEL_OUT)
print("Saved model to", MODEL_OUT)


KeyboardInterrupt: 

In [2]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))
print(tf.config.list_physical_devices('GPU'))


Num GPUs Available: 0
[]


In [1]:
import sys, tensorflow as tf
print("Python version:", sys.version)
print("TensorFlow version:", tf.__version__)


Python version: 3.10.18 | packaged by Anaconda, Inc. | (main, Jun  5 2025, 13:08:55) [MSC v.1929 64 bit (AMD64)]
TensorFlow version: 2.17.0


In [3]:
import tensorflow as tf
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


TensorFlow version: 2.17.0
Num GPUs Available: 0


In [1]:
import tensorflow as tf
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))


Num GPUs Available: 0


In [2]:

tf.debugging.set_log_device_placement(True)

# Place tensors on the CPU
with tf.device('/CPU:0'):
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# Run on the GPU
c = tf.matmul(a, b)
print(c)
     

Executing op _MklMatMul in device /job:localhost/replica:0/task:0/device:CPU:0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [3]:

tf.debugging.set_log_device_placement(True)

# Place tensors on the CPU
with tf.device('/CPU:0'):
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# Run on the GPU
c = tf.matmul(a, b)
print(c)
     

Executing op _MklMatMul in device /job:localhost/replica:0/task:0/device:CPU:0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [4]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))

Num GPUs Available:  0


In [5]:
tf.debugging.set_log_device_placement(True)

# Place tensors on the CPU
with tf.device('/CPU:0'):
  a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
  b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])

# Run on the GPU
c = tf.matmul(a, b)
print(c)

Executing op _MklMatMul in device /job:localhost/replica:0/task:0/device:CPU:0
tf.Tensor(
[[22. 28.]
 [49. 64.]], shape=(2, 2), dtype=float32)


In [6]:
tf.debugging.set_log_device_placement(True)

try:
  # Specify an invalid GPU device
  with tf.device('/device:GPU:2'):
    a = tf.constant([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]])
    b = tf.constant([[1.0, 2.0], [3.0, 4.0], [5.0, 6.0]])
    c = tf.matmul(a, b)
except RuntimeError as e:
  print(e)
  

Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op _EagerConst in device /job:localhost/replica:0/task:0/device:CPU:0
Executing op _MklMatMul in device /job:localhost/replica:0/task:0/device:CPU:0
