In [None]:
# Binary Golf Course Classifier
# MobileNetV2 transfer learning with data augmentation

import os
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models, callbacks
from datasets import load_dataset
import kagglehub

In [None]:
# GPU Configuration
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    for gpu in gpus:
        tf.config.experimental.set_memory_growth(gpu, True)
    keras.mixed_precision.set_global_policy('mixed_float16')
    print(f"GPU configured: {len(gpus)} device(s)")
else:
    print("No GPU detected, using CPU")

In [None]:
# Hyperparameters
IMAGE_SIZE = (224, 224)
BATCH_SIZE = 32
LEARNING_RATE = 1e-4
MAX_EPOCHS = 15

In [None]:
# Load datasets
print("Loading UC Merced dataset...")
ucmerced = load_dataset("blanchon/UC_Merced", split="train")
print(f"UC Merced: {len(ucmerced)} images, 21 classes")

print("Loading Danish Golf Course dataset...")
golf_dataset_path = kagglehub.dataset_download('jacotaco/danish-golf-courses-orthophotos')
IMAGES_DIR = os.path.join(golf_dataset_path, '1. orthophotos')
danish_golf_files = [os.path.join(IMAGES_DIR, f) for f in os.listdir(IMAGES_DIR)]
print(f"Danish golf: {len(danish_golf_files)} images")

In [None]:
# UC Merced class definitions
# Challenging: visually similar to golf courses
CHALLENGING_CLASSES = {
    2: 'baseballdiamond',
    20: 'tenniscourt',
    0: 'agricultural',
    12: 'mediumresidential',
    18: 'sparseresidential',
    6: 'denseresidential',
    16: 'river',
    5: 'chaparral',
}

# Easy: clearly different from golf courses
EASY_CLASSES = {
    1: 'airplane', 3: 'beach', 4: 'buildings', 7: 'forest',
    8: 'freeway', 10: 'harbor', 11: 'intersection', 13: 'mobilehomepark',
    14: 'overpass', 15: 'parkinglot', 17: 'runway', 19: 'storagetanks',
}

# Golf course is class 9
ucmerced_golf_samples = [item for item in ucmerced if item['label'] == 9]
print(f"UC Merced golf: {len(ucmerced_golf_samples)} images")

In [None]:
# Data augmentation
def get_augmentation_layer():
    return keras.Sequential([
        layers.RandomFlip("horizontal"),
        layers.RandomRotation(0.05),
        layers.RandomZoom(0.1),
        layers.RandomBrightness(0.1),
        layers.RandomContrast(0.1),
    ], name='augmentation')

In [None]:
# Image loading functions
def prepare_danish_golf_images(image_paths, target_size=(224, 224)):
    images = []
    for img_path in image_paths:
        img = Image.open(img_path).convert('RGB')
        img = img.resize(target_size)
        img_array = np.array(img) / 255.0
        images.append(img_array)
    return np.array(images, dtype=np.float32)


def prepare_ucmerced_golf_images(golf_samples, target_size=(224, 224)):
    images = []
    for sample in golf_samples:
        img = sample['image'].resize(target_size)
        img_array = np.array(img) / 255.0
        images.append(img_array)
    return np.array(images, dtype=np.float32)


def prepare_ucmerced_negatives_by_difficulty(dataset, target_size=(224, 224)):
    challenging_images = []
    easy_images = []

    for class_id in CHALLENGING_CLASSES.keys():
        class_samples = [item for item in dataset if item['label'] == class_id]
        for sample in class_samples:
            img = sample['image'].resize(target_size)
            img_array = np.array(img) / 255.0
            challenging_images.append(img_array)

    for class_id in EASY_CLASSES.keys():
        class_samples = [item for item in dataset if item['label'] == class_id]
        for sample in class_samples:
            img = sample['image'].resize(target_size)
            img_array = np.array(img) / 255.0
            easy_images.append(img_array)

    return np.array(challenging_images, dtype=np.float32), np.array(easy_images, dtype=np.float32)

In [None]:
# Prepare images
np.random.seed(42)
tf.random.set_seed(42)

print("Loading images...")
danish_golf_images = prepare_danish_golf_images(danish_golf_files, IMAGE_SIZE)
ucmerced_golf_images = prepare_ucmerced_golf_images(ucmerced_golf_samples, IMAGE_SIZE)
challenging_negatives, easy_negatives = prepare_ucmerced_negatives_by_difficulty(ucmerced, IMAGE_SIZE)

print(f"Positives: {len(danish_golf_images) + len(ucmerced_golf_images)}")
print(f"Negatives: {len(challenging_negatives) + len(easy_negatives)}")

In [None]:
# Stratified split
# Danish golf: 80/10/10, UC Merced golf: 60/20/20
# Challenging negatives: 50/30/20, Easy negatives: 70/20/10

n_danish = len(danish_golf_images)
danish_indices = np.random.permutation(n_danish)
danish_train_idx = danish_indices[:int(0.80 * n_danish)]
danish_val_idx = danish_indices[int(0.80 * n_danish):int(0.90 * n_danish)]
danish_test_idx = danish_indices[int(0.90 * n_danish):]

n_ucm_golf = len(ucmerced_golf_images)
ucm_golf_indices = np.random.permutation(n_ucm_golf)
ucm_golf_train_idx = ucm_golf_indices[:int(0.60 * n_ucm_golf)]
ucm_golf_val_idx = ucm_golf_indices[int(0.60 * n_ucm_golf):int(0.80 * n_ucm_golf)]
ucm_golf_test_idx = ucm_golf_indices[int(0.80 * n_ucm_golf):]

n_challenging = len(challenging_negatives)
challenging_indices = np.random.permutation(n_challenging)
challenging_train_idx = challenging_indices[:int(0.50 * n_challenging)]
challenging_val_idx = challenging_indices[int(0.50 * n_challenging):int(0.80 * n_challenging)]
challenging_test_idx = challenging_indices[int(0.80 * n_challenging):]

n_easy = len(easy_negatives)
easy_indices = np.random.permutation(n_easy)
easy_train_idx = easy_indices[:int(0.70 * n_easy)]
easy_val_idx = easy_indices[int(0.70 * n_easy):int(0.90 * n_easy)]
easy_test_idx = easy_indices[int(0.90 * n_easy):]

In [None]:
# Combine splits
train_images = np.concatenate([
    danish_golf_images[danish_train_idx],
    ucmerced_golf_images[ucm_golf_train_idx],
    challenging_negatives[challenging_train_idx],
    easy_negatives[easy_train_idx]
])
train_labels = np.concatenate([
    np.ones(len(danish_train_idx)),
    np.ones(len(ucm_golf_train_idx)),
    np.zeros(len(challenging_train_idx)),
    np.zeros(len(easy_train_idx))
])

val_images = np.concatenate([
    danish_golf_images[danish_val_idx],
    ucmerced_golf_images[ucm_golf_val_idx],
    challenging_negatives[challenging_val_idx],
    easy_negatives[easy_val_idx]
])
val_labels = np.concatenate([
    np.ones(len(danish_val_idx)),
    np.ones(len(ucm_golf_val_idx)),
    np.zeros(len(challenging_val_idx)),
    np.zeros(len(easy_val_idx))
])

test_images = np.concatenate([
    danish_golf_images[danish_test_idx],
    ucmerced_golf_images[ucm_golf_test_idx],
    challenging_negatives[challenging_test_idx],
    easy_negatives[easy_test_idx]
])
test_labels = np.concatenate([
    np.ones(len(danish_test_idx)),
    np.ones(len(ucm_golf_test_idx)),
    np.zeros(len(challenging_test_idx)),
    np.zeros(len(easy_test_idx))
])

# Shuffle
for arr, lbl in [(train_images, train_labels), (val_images, val_labels), (test_images, test_labels)]:
    idx = np.random.permutation(len(arr))
    arr[:] = arr[idx]
    lbl[:] = lbl[idx]

print(f"Train: {len(train_images)}, Val: {len(val_images)}, Test: {len(test_images)}")

In [None]:
# Create TensorFlow datasets
augmentation_layer = get_augmentation_layer()

def augment_with_passthrough(images, labels):
    """50% chance augmentation."""
    def apply_aug():
        return tf.cast(augmentation_layer(images, training=True), tf.float32)
    def keep_orig():
        return tf.cast(images, tf.float32)
    should_aug = tf.random.uniform([]) >= 0.5
    return tf.cond(should_aug, apply_aug, keep_orig), labels

train_ds = tf.data.Dataset.from_tensor_slices((train_images, train_labels))
train_ds = train_ds.shuffle(1000, reshuffle_each_iteration=True)
train_ds = train_ds.batch(BATCH_SIZE)
train_ds = train_ds.map(augment_with_passthrough, num_parallel_calls=tf.data.AUTOTUNE)
train_ds = train_ds.prefetch(tf.data.AUTOTUNE)

val_ds = tf.data.Dataset.from_tensor_slices((val_images, val_labels))
val_ds = val_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

test_ds = tf.data.Dataset.from_tensor_slices((test_images, test_labels))
test_ds = test_ds.batch(BATCH_SIZE).prefetch(tf.data.AUTOTUNE)

In [None]:
# Build classifier
def build_golf_classifier(input_shape=(224, 224, 3)):
    """MobileNetV2 binary classifier."""
    base_model = keras.applications.MobileNetV2(
        input_shape=input_shape,
        include_top=False,
        weights='imagenet'
    )
    base_model.trainable = False

    inputs = keras.Input(shape=input_shape)
    x = keras.applications.mobilenet_v2.preprocess_input(inputs * 255.0)
    x = base_model(x, training=False)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.3)(x)
    x = layers.Dense(128, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    outputs = layers.Dense(1, activation='sigmoid', dtype='float32')(x)

    return keras.Model(inputs=inputs, outputs=outputs, name='GolfClassifier')


model = build_golf_classifier(input_shape=(*IMAGE_SIZE, 3))
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=LEARNING_RATE),
    loss='binary_crossentropy',
    metrics=['accuracy', keras.metrics.Precision(), keras.metrics.Recall()]
)
model.summary()

In [None]:
# Callbacks
callback_list = [
    callbacks.ModelCheckpoint(
        filepath='best_golf_classifier.keras',
        monitor='val_accuracy',
        save_best_only=True,
        verbose=1
    ),
    callbacks.EarlyStopping(
        monitor='val_loss',
        patience=7,
        restore_best_weights=True,
        verbose=1
    ),
    callbacks.ReduceLROnPlateau(
        monitor='val_loss',
        factor=0.5,
        patience=4,
        verbose=1,
        min_lr=1e-7
    )
]

In [None]:
# Train
history = model.fit(
    train_ds,
    validation_data=val_ds,
    epochs=MAX_EPOCHS,
    callbacks=callback_list,
    verbose=1
)

In [None]:
# Evaluate
print("Validation:")
val_loss, val_acc, val_prec, val_rec = model.evaluate(val_ds, verbose=0)
print(f"  Accuracy: {val_acc:.4f}, Precision: {val_prec:.4f}, Recall: {val_rec:.4f}")

print("Test:")
test_loss, test_acc, test_prec, test_rec = model.evaluate(test_ds, verbose=0)
print(f"  Accuracy: {test_acc:.4f}, Precision: {test_prec:.4f}, Recall: {test_rec:.4f}")

In [None]:
# Save model
model.save('final_golf_classifier.keras')
print("Model saved")

In [None]:
# Training history
plt.figure(figsize=(12, 4))

plt.subplot(1, 2, 1)
plt.plot(history.history['accuracy'], label='Train')
plt.plot(history.history['val_accuracy'], label='Val')
plt.title('Accuracy')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True)

plt.subplot(1, 2, 2)
plt.plot(history.history['loss'], label='Train')
plt.plot(history.history['val_loss'], label='Val')
plt.title('Loss')
plt.xlabel('Epoch')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

In [None]:
# Sample predictions
sample_indices = np.random.choice(len(val_images), size=6, replace=False)

fig, axes = plt.subplots(2, 3, figsize=(12, 8))
axes = axes.flatten()

for i, idx in enumerate(sample_indices):
    img = val_images[idx]
    true_label = val_labels[idx]
    pred_prob = model.predict(np.expand_dims(img, axis=0), verbose=0)[0][0]
    pred_label = 1 if pred_prob > 0.5 else 0

    axes[i].imshow(img)
    axes[i].set_title(
        f"True: {'Golf' if true_label == 1 else 'Not Golf'}\n"
        f"Pred: {'Golf' if pred_label == 1 else 'Not Golf'} ({pred_prob:.1%})",
        color='green' if pred_label == true_label else 'red'
    )
    axes[i].axis('off')

plt.tight_layout()
plt.show()