# Mobile Computer Vision

In Mobile Computer Vision, ressource-constrained devices are used to run models. In general, there are three ways to make a model ready for mobile deployment:
- make it smaller (less weights, less layers)
- prune it 
- quantize the weights

Consider [https://xilinx.github.io/Vitis-AI/3.5/html/docs/workflow.html](https://xilinx.github.io/Vitis-AI/3.5/html/docs/workflow.html) for an introduction.

For now, we just want to find a small model for the Lab Course Dataset (room classification).

In [2]:
import tensorflow as tf
import tensorflow.keras as keras
import numpy as np
from matplotlib import pyplot as plt
import os
from pathlib import Path
import shutil
from sklearn.model_selection import train_test_split

# Configure TensorFlow to use GPU in WSL
print("TensorFlow version:", tf.__version__)
print("Num GPUs Available:", len(tf.config.list_physical_devices('GPU')))

# Enable memory growth to prevent TensorFlow from allocating all GPU memory at once
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"Using GPU: {gpus}")
    except RuntimeError as e:
        print(e)
else:
    print("No GPU detected. Running on CPU.")

  if not hasattr(np, "object"):


TensorFlow version: 2.20.0
Num GPUs Available: 0
No GPU detected. Running on CPU.


# Data
The data is available from the Lab_Course_Dataset folder. The dataset structure is:

```
Lab_Course_Dataset/
├── Lise-Meitner-Str-9_9377/
│   └── Lise-Meitner-Str-9_9377/
│       ├── Indoor/
│       │   ├── 9377_EG/
│       │   │   ├── HW_706/, HW_708/, HW_709/
│       │   │   └── RM_001/, RM_006/, RM_008/, RM_012/, RM_020/, RM_030/
│       │   └── 9377_1OG/
│       │       └── HW_716/, HW_718/, HW_719/, RM_126/
│       └── Outdoor/
│           └── North/, North_East/, South/, South_East/
└── Willy-Messerschmitt-5_9387/
    └── Willy-Messerschmitt-5_9387/
        ├── Indoor/
        │   └── HW_701/, RM_001/, RM_006/, RM_009/, RM_010/, RM_011/, RM_012/, RM_019/, RM_020/
        └── Outdoor/
            └── Main_entrance/
```

Total: 27 different locations (both indoor rooms and outdoor areas).
The script will automatically organize the data into train/val/test splits for training.

# Base Model: MobileNet
We use the MobileNet model which implements a smart way to reduce multiplications using Depth-Wise convolution. It is a model that provides simplifications such that it can be used on a mobile phone easily.

In [3]:
def conv_layer(x, filters, stride=1):
    x = tf.keras.layers.Conv2D(filters, kernel_size=(3,3), strides=(stride,stride), padding='same', use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)
    
    return x


def depthwise_conv_layer(x, filters, stride=1, depth_multiplier=1):
    x = tf.keras.layers.DepthwiseConv2D(kernel_size=(3,3), strides=(stride,stride), padding='same', depth_multiplier=depth_multiplier, use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)


    x = tf.keras.layers.Conv2D(filters, kernel_size=(1,1), strides=(1,1), padding='same', use_bias=False)(x)
    x = tf.keras.layers.BatchNormalization()(x)
    x = tf.keras.layers.ReLU()(x)

    return x


def mobilenet(input_tensor, alpha=1.0, depth_multiplier=1, include_top = False, classes=1000, classifier_activation='softmax'):
    """_summary_
    Args:
        alpha (float, optional): controls the width of the network.
            - If `alpha` < 1.0, proportionally decreases the number
                of filters in each layer.
            - If `alpha` > 1.0, proportionally increases the number
                of filters in each layer.
            - If `alpha` = 1, default number of filters from the paper
                 are used at each layer.
        depth_multiplier (int, optional): The number of depthwise convolution output channels
            for each input channel. Defaults to 1.
    """
    x = tf.keras.layers.ZeroPadding2D()(input_tensor)
    x = conv_layer(x, int(32 * alpha), 2)
    x = depthwise_conv_layer(x, int(64 * alpha), depth_multiplier=depth_multiplier)
    x = tf.keras.layers.ZeroPadding2D()(x)
    x = depthwise_conv_layer(x, int(128 * alpha), 2, depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(128 * alpha), depth_multiplier=depth_multiplier)
    x = tf.keras.layers.ZeroPadding2D()(x)
    x = depthwise_conv_layer(x, int(256 * alpha), 2, depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(256 * alpha), depth_multiplier=depth_multiplier)
    x = tf.keras.layers.ZeroPadding2D()(x)
    x = depthwise_conv_layer(x, int(512 * alpha), 2, depth_multiplier=depth_multiplier)

    x = depthwise_conv_layer(x, int(512 * alpha), depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(512 * alpha), depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(512 * alpha), depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(512 * alpha), depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(512 * alpha), depth_multiplier=depth_multiplier)

    x = tf.keras.layers.ZeroPadding2D()(x)
    x = depthwise_conv_layer(x, int(1024 * alpha), 2, depth_multiplier=depth_multiplier)
    x = depthwise_conv_layer(x, int(1024 * alpha), 2, depth_multiplier=depth_multiplier)

#   This are the layer which has been used to train imagenet
#   x = tf.keras.layers.GlobalAveragePooling2D()(x)
#   x = tf.keras.layers.Flatten()(x)
#   x = tf.keras.layers.Dense(units= classes, activation=classifier_activation)(x)
    
    return x

# Building a model out of it
This just prvovides a sequence of layers (a quite deep neural network, maybe you need to train on a GPU)

In [4]:
# Get script directory for WSL-compatible paths
script_dir = Path(__file__).parent.resolve() if '__file__' in dir() else Path.cwd()
base_path = script_dir.parent  # Go up to Labcourse folder

# Dataset paths (WSL compatible)
dataset_path = base_path / "Lab_Course_Dataset"
processed_path = base_path / "Lab_Course_Dataset_Processed"

cfg = {
    "dataset": "Lab_Course_Dataset",
    "img_input_shape": [224, 224, 3],
    "number_of_classes": 0,  # Will be auto-detected
    "lr1": 1e-4,
    "epochs": 15,
    "batch_size": 32,
    "data_path": str(dataset_path),
    "processed_path": str(processed_path)
}

print(f"Dataset path: {cfg['data_path']}")
print(f"Processed path: {cfg['processed_path']}")

Dataset path: c:\Users\Klein\Desktop\Labcourse\Lab_Course_Dataset
Processed path: c:\Users\Klein\Desktop\Labcourse\Lab_Course_Dataset_Processed


In [None]:
# Function to collect all images from the nested folder structure
def collect_images_from_dataset(dataset_path):
    """
    Traverse the Lab_Course_Dataset structure and collect all images organized by location.
    Returns a dict: {location_name: [list of image paths]}
    """
    images_by_location = {}
    dataset_path = Path(dataset_path)
    
    # Traverse the nested structure
    for building in dataset_path.iterdir():
        if not building.is_dir():
            continue
        # Handle double-nested folder (Lise-Meitner-Str-9_9377/Lise-Meitner-Str-9_9377/)
        inner_building = building / building.name
        if inner_building.exists():
            building = inner_building
        
        # Check both Indoor and Outdoor folders
        for category in ["Indoor", "Outdoor"]:
            category_path = building / category
            if not category_path.exists():
                continue
            
            if category == "Indoor":
                # Indoor has floor structure
                for floor in category_path.iterdir():
                    if not floor.is_dir():
                        continue
                    for location in floor.iterdir():
                        if not location.is_dir():
                            continue
                        location_name = location.name
                        if location_name not in images_by_location:
                            images_by_location[location_name] = []
                        # Collect all image files
                        for img_file in location.iterdir():
                            if img_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']:
                                images_by_location[location_name].append(img_file)
            else:
                # Outdoor has direct location folders
                for location in category_path.iterdir():
                    if not location.is_dir():
                        continue
                    location_name = location.name
                    if location_name not in images_by_location:
                        images_by_location[location_name] = []
                    # Collect all image files
                    for img_file in location.iterdir():
                        if img_file.suffix.lower() in ['.jpg', '.jpeg', '.png', '.bmp']:
                            images_by_location[location_name].append(img_file)
    
    return images_by_location

# Function to create train/val/test split
def create_dataset_splits(images_by_location, output_path, train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Create train/val/test directory structure with proper splits.
    """
    output_path = Path(output_path)
    
    # Create directories
    for split in ['train', 'val', 'test']:
        for location_name in images_by_location.keys():
            (output_path / split / location_name).mkdir(parents=True, exist_ok=True)
    
    # Split and copy images
    total_images = 0
    for location_name, images in images_by_location.items():
        if len(images) < 3:
            print(f"Skipping {location_name}: not enough images ({len(images)})")
            continue
            
        # Split images
        train_imgs, temp_imgs = train_test_split(images, train_size=train_ratio, random_state=42)
        relative_val = val_ratio / (val_ratio + test_ratio)
        val_imgs, test_imgs = train_test_split(temp_imgs, train_size=relative_val, random_state=42)
        
        # Copy images
        for img in train_imgs:
            shutil.copy2(img, output_path / 'train' / location_name / img.name)
        for img in val_imgs:
            shutil.copy2(img, output_path / 'val' / location_name / img.name)
        for img in test_imgs:
            shutil.copy2(img, output_path / 'test' / location_name / img.name)
        
        total_images += len(images)
        print(f"  {location_name}: {len(images)} images -> train:{len(train_imgs)}, val:{len(val_imgs)}, test:{len(test_imgs)}")
    
    print(f"\nDataset created at {output_path}")
    print(f"Total images processed: {total_images}")

# Check if processed dataset exists, if not create it
if not os.path.exists(cfg["processed_path"]) or not os.path.exists(os.path.join(cfg["processed_path"], "train")):
    print("Processing dataset...")
    images_by_location = collect_images_from_dataset(cfg["data_path"])
    print(f"Found {len(images_by_location)} locations")
    create_dataset_splits(images_by_location, cfg["processed_path"])
else:
    print("Using existing processed dataset")

# Auto-detect number of classes
train_path = os.path.join(cfg["processed_path"], "train")
if os.path.exists(train_path):
    classes = [d for d in os.listdir(train_path) if os.path.isdir(os.path.join(train_path, d))]
    cfg["number_of_classes"] = len(classes)
    print(f"\nFound {cfg['number_of_classes']} classes: {sorted(classes)}")

Processing dataset...
Found 13 rooms
  HW_716: 208 images
  HW_718: 177 images
  HW_719: 182 images
  RM_126: 192 images
  HW_706: 225 images
  HW_708: 235 images
  HW_709: 350 images
  RM_001: 211 images
  RM_006: 167 images
  RM_008: 142 images
  RM_012: 189 images
  RM_020: 237 images
  RM_030: 263 images
Dataset created at c:\Users\Klein\Desktop\Labcourse\Lab_Course_Dataset_Processed
Found 13 classes: ['HW_706', 'HW_708', 'HW_709', 'HW_716', 'HW_718', 'HW_719', 'RM_001', 'RM_006', 'RM_008', 'RM_012', 'RM_020', 'RM_030', 'RM_126']


In [None]:
# Build model
new_inputs = tf.keras.layers.Input(shape=cfg["img_input_shape"])
x = mobilenet(new_inputs)

x = tf.keras.layers.Flatten()(x)
new_outputs = tf.keras.layers.Dense(cfg["number_of_classes"], activation='softmax')(x)

model = tf.keras.Model(new_inputs, new_outputs)

loss_fn = keras.losses.CategoricalCrossentropy()
optimizer = keras.optimizers.Adam(learning_rate=cfg["lr1"])
    
use_metrics = ['accuracy']
model.compile(optimizer=optimizer, loss=loss_fn, metrics=use_metrics)
print(model.summary())

# Data generators with augmentation
train_datagen = keras.preprocessing.image.ImageDataGenerator(
    rescale=1/255.0,
    rotation_range=20,
    width_shift_range=0.2,
    height_shift_range=0.2,
    horizontal_flip=True
)

valid_datagen = keras.preprocessing.image.ImageDataGenerator(rescale=1/255.0)

train_generator = train_datagen.flow_from_directory(
    directory=os.path.join(cfg["processed_path"], "train"),
    target_size=(cfg["img_input_shape"][0], cfg["img_input_shape"][1]),
    color_mode="rgb",
    batch_size=cfg["batch_size"],
    class_mode="categorical",
    shuffle=True,
    seed=42
)

valid_generator = valid_datagen.flow_from_directory(
    directory=os.path.join(cfg["processed_path"], "val"),
    target_size=(cfg["img_input_shape"][0], cfg["img_input_shape"][1]),
    color_mode="rgb",
    batch_size=cfg["batch_size"],
    class_mode="categorical",
    shuffle=True,
    seed=42
)

test_generator = valid_datagen.flow_from_directory(
    directory=os.path.join(cfg["processed_path"], "test"),
    target_size=(cfg["img_input_shape"][0], cfg["img_input_shape"][1]),
    color_mode="rgb",
    batch_size=cfg["batch_size"],
    class_mode="categorical",
    shuffle=False,
    seed=42
)

print(f"\nClass indices: {train_generator.class_indices}")

# Training with callbacks
callbacks = [
    keras.callbacks.TerminateOnNaN(),
    keras.callbacks.EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True),
    keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2, min_lr=1e-7)
]

history = model.fit(
    train_generator,
    validation_data=valid_generator,
    epochs=cfg["epochs"],
    callbacks=callbacks,
    verbose=1
)

# Save model with absolute path
model_path = base_path / "mobilenet-lab_course_dataset.h5"
model.save(str(model_path))
print(f"Model saved as '{model_path}'")

Found 5722 images belonging to 2 classes.
Found 2729 images belonging to 2 classes.
Found 2492 images belonging to 2 classes.


  model.fit_generator(generator=train_generator,


Epoch 1/15
Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


  saving_api.save_model(


In [None]:
# Evaluate on test set
print("\nEvaluating on test set...")
test_loss, test_accuracy = model.evaluate(test_generator)
print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_accuracy:.4f}")



[0.44316813349723816, 0.9361958503723145]

In [None]:
# Plot training history
print("\nPlotting training history...")
fig, axes = plt.subplots(1, 2, figsize=(12, 4))

axes[0].plot(history.history['loss'], label='Train Loss')
axes[0].plot(history.history['val_loss'], label='Val Loss')
axes[0].set_xlabel('Epoch')
axes[0].set_ylabel('Loss')
axes[0].legend()
axes[0].set_title('Training and Validation Loss')

axes[1].plot(history.history['accuracy'], label='Train Accuracy')
axes[1].plot(history.history['val_accuracy'], label='Val Accuracy')
axes[1].set_xlabel('Epoch')
axes[1].set_ylabel('Accuracy')
axes[1].legend()
axes[1].set_title('Training and Validation Accuracy')

plt.tight_layout()
history_path = base_path / "training_history.png"
plt.savefig(str(history_path))
print(f"Training history saved to '{history_path}'")
plt.show()

# Conclusion
The MobileNet architecture provides an architecture deployable to small devices. With GPU acceleration via WSL, training is significantly faster. The model can classify rooms from the Lab_Course_Dataset effectively, handling the complex nested folder structure and organizing data into proper train/val/test splits automatically.