# City Locator

In [13]:
import kagglehub

# Download latest version
path = kagglehub.dataset_download("amaralibey/gsv-cities")

print("Path to dataset files:", path)

Path to dataset files: /home/go56pic/.cache/kagglehub/datasets/amaralibey/gsv-cities/versions/1


## City Locator (BIG Model)

### Import TensorFlow

In [14]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import mixed_precision
from matplotlib import pyplot as plt

# Enable mixed precision for speed and lower memory when supported
mixed_precision.set_global_policy("mixed_float16")

### Load Image Dataset

In [15]:
import os
from pathlib import Path
from collections import Counter

DATA_ROOT = Path(path)

# Recursively search for a directory that contains many class subfolders with files
# This is more flexible than the Room Locator structure and handles nested layouts.
def find_class_dir(root: Path, max_depth: int = 3, min_classes: int = 3):
    queue = [(root, 0)]
    best = None
    while queue:
        current, depth = queue.pop(0)
        if depth > max_depth:
            continue
        subdirs = [d for d in current.iterdir() if d.is_dir()]
        if len(subdirs) >= min_classes:
            # Heuristic: make sure subfolders contain files
            has_files = any(any(f.is_file() for f in d.iterdir()) for d in subdirs)
            if has_files:
                return current, subdirs
            best = best or (current, subdirs)
        for sd in subdirs:
            queue.append((sd, depth + 1))
    return best if best else (root, [d for d in root.iterdir() if d.is_dir()])

DATA_DIR, class_dirs = find_class_dir(DATA_ROOT)
print(f"Using data directory: {DATA_DIR}")
if class_dirs:
    preview = [d.name for d in class_dirs][:10]
    suffix = "..." if len(class_dirs) > 10 else ""
    print(f"Found {len(class_dirs)} class folders: {preview}{suffix}")
else:
    print("Warning: No class subfolders found; please verify dataset structure.")

IMG_SIZE = 380  # smaller than B5 setup to cut training time
BATCH_SIZE = 32  # adjust down if GPU memory is tight
AUTOTUNE = tf.data.AUTOTUNE

train_ds = keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="training",
)

val_ds = keras.utils.image_dataset_from_directory(
    DATA_DIR,
    labels="inferred",
    label_mode="categorical",
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    shuffle=True,
    seed=42,
    validation_split=0.2,
    subset="validation",
)

NUM_CLASSES = len(train_ds.class_names)
print(f"Detected NUM_CLASSES = {NUM_CLASSES}")

# Pipeline optimizations
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)

Using data directory: /home/go56pic/.cache/kagglehub/datasets/amaralibey/gsv-cities/versions/1/Images
Found 23 class folders: ['Miami', 'Medellin', 'London', 'Brussels', 'Barcelona', 'Lisbon', 'Melbourne', 'Bangkok', 'Rome', 'Chicago']...
Found 529506 files belonging to 23 classes.
Using 423605 files for training.
Found 529506 files belonging to 23 classes.
Using 105901 files for validation.
Detected NUM_CLASSES = 23


### Inspect Dataset Structure

In [16]:
from collections import defaultdict

# Summarize class counts (first 30 shown)
if class_dirs:
    counts = {}
    for d in class_dirs:
        counts[d.name] = sum(1 for f in d.rglob("*") if f.is_file())
    print(f"Total classes detected: {len(counts)}")
    top_names = list(counts.keys())[:30]
    for name in top_names:
        print(f"{name}: {counts[name]} images")
    if len(counts) > 30:
        print(f"... {len(counts) - 30} more classes not shown")
else:
    print("No classes detected; please inspect the dataset root manually.")

Total classes detected: 23
Miami: 43637 images
Medellin: 6024 images
London: 58672 images
Brussels: 14171 images
Barcelona: 15894 images
Lisbon: 27045 images
Melbourne: 28542 images
Bangkok: 22271 images
Rome: 24068 images
Chicago: 34091 images
Osaka: 22605 images
Minneapolis: 22326 images
Madrid: 14554 images
WashingtonDC: 11545 images
MexicoCity: 12801 images
Boston: 32616 images
BuenosAires: 8481 images
PRG: 17590 images
LosAngeles: 8891 images
PRS: 39963 images
Phoenix: 36251 images
OSL: 9756 images
TRT: 17712 images


### Add Noise / Randomness

In [17]:
# Data augmentation tuned for location invariance
data_augmentation = keras.Sequential(
    [
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.12),
        layers.RandomZoom(0.15),
        layers.RandomTranslation(0.1, 0.1),
        layers.RandomContrast(0.2),
        layers.RandomBrightness(0.2),
    ],
    name="data_augmentation",
)

### Setup and Build Training Model

In [18]:
# Base model: EfficientNetB4 for efficiency
base_model = keras.applications.EfficientNetB4(
    include_top=False,
    weights="imagenet",
    input_shape=(IMG_SIZE, IMG_SIZE, 3),
)

# Stage 1: freeze backbone
base_model.trainable = False

inputs = keras.Input(shape=(IMG_SIZE, IMG_SIZE, 3))
x = data_augmentation(inputs)
x = keras.applications.efficientnet.preprocess_input(x)

x = base_model(x, training=False)
x = layers.GlobalAveragePooling2D(name="global_avg_pool")(x)

# Lighter classification head with regularization
x = layers.BatchNormalization()(x)
x = layers.Dense(512, activation="relu", kernel_regularizer=keras.regularizers.l2(1e-4))(x)
x = layers.Dropout(0.4)(x)
x = layers.Dense(256, activation="relu", kernel_regularizer=keras.regularizers.l2(1e-4))(x)
x = layers.Dropout(0.3)(x)
outputs = layers.Dense(NUM_CLASSES, activation="softmax", dtype="float32", name="predictions")(x)

model = keras.Model(inputs, outputs, name="efficientnetB4_city_locator")

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-3),
    loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
    metrics=[
        "accuracy",
        keras.metrics.TopKCategoricalAccuracy(k=3, name="top_3_acc"),
    ],
)

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb4_notop.h5
[1m71686520/71686520[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step


### Execute Training Model

In [19]:
# Show the model structure and run a short training loop so the cell produces output
model.summary()
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=20,  # trimmed for faster first pass
    verbose=1,
)

Epoch 1/20
[1m 1174/13238[0m [32m━[0m[37m━━━━━━━━━━━━━━━━━━━[0m [1m1:03:55[0m 318ms/step - accuracy: 0.2264 - loss: 2.9005 - top_3_acc: 0.4309

KeyboardInterrupt: 

### Plot Epochs

In [None]:
plt.plot(history.epoch, history.history["loss"])
plt.title("Train Loss Curve")
plt.show()

plt.plot(history.epoch, history.history["accuracy"])
plt.title("Train Accuracy Curve")
plt.show()

### Fine-Tuning Model

In [None]:
# Unfreeze top blocks of EfficientNetB4 for fine-tuning
base_model.trainable = True

fine_tune_at = len(base_model.layers) * 3 // 4  # top ~25% of layers

for i, layer in enumerate(base_model.layers):
    layer.trainable = i >= fine_tune_at
    # Keep BatchNorm in eval mode for stability
    if isinstance(layer, layers.BatchNormalization):
        layer.trainable = False

model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss=keras.losses.CategoricalCrossentropy(label_smoothing=0.05),
    metrics=[
        "accuracy",
        keras.metrics.TopKCategoricalAccuracy(k=3, name="top_3_acc"),
    ],
)

# Train again with early stopping
early_stopping = keras.callbacks.EarlyStopping(
    monitor="val_accuracy",
    patience=3,
    restore_best_weights=True,
)

history_finetune = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=10,  # shorter fine-tune phase
    callbacks=[early_stopping],
    verbose=1,
)