In [1]:

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'madrid-es:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F5146647%2F8601919%2Fbundle%2Farchive.zip%3FX-Goog-Algorithm%3DGOOG4-RSA-SHA256%26X-Goog-Credential%3Dgcp-kaggle-com%2540kaggle-161607.iam.gserviceaccount.com%252F20240604%252Fauto%252Fstorage%252Fgoog4_request%26X-Goog-Date%3D20240604T093627Z%26X-Goog-Expires%3D259200%26X-Goog-SignedHeaders%3Dhost%26X-Goog-Signature%3D48072369d4e9f6187fd7bbf0d00c326e2e2c985f50ecda408ea6fdadd5e5af2b38e0e915aa43e515f014dd9d55ea6be24c48df23d1b34c8088af738806272312ff3f227129694e02c0fd1e4cced50471b51482001cbdb557b3205aa1182ee4f34cb442a89c4bcac78dec67592523cb85741ab506f99b72222647414919c63904787948a8943ccb3583a5a747ae8ce20d25836e3062858c6342523f4f00bace210c2aebaa0f32813be555032f0825b24603731c09684f97c0ff81f037dca9f9c4ea009b1283348d9e68a58b88d25c6fb96525f5937fc97352e548289bd6426e8c5aa2c0b57c8144036c91f3dd28bbb78d6a868467e32f02337e959d1367512458'

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null
shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
  os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
  pass
try:
  os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
  pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):
    directory, download_url_encoded = data_source_mapping.split(':')
    download_url = unquote(download_url_encoded)
    filename = urlparse(download_url).path
    destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
    try:
        with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
            total_length = fileres.headers['content-length']
            print(f'Downloading {directory}, {total_length} bytes compressed')
            dl = 0
            data = fileres.read(CHUNK_SIZE)
            while len(data) > 0:
                dl += len(data)
                tfile.write(data)
                done = int(50 * dl / int(total_length))
                sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
                sys.stdout.flush()
                data = fileres.read(CHUNK_SIZE)
            if filename.endswith('.zip'):
              with ZipFile(tfile) as zfile:
                zfile.extractall(destination_path)
            else:
              with tarfile.open(tfile.name) as tarfile:
                tarfile.extractall(destination_path)
            print(f'\nDownloaded and uncompressed: {directory}')
    except HTTPError as e:
        print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
        continue
    except OSError as e:
        print(f'Failed to load {download_url} to path {destination_path}')
        continue

print('Data source import complete.')


Downloading madrid-es, 1185389329 bytes compressed
Downloaded and uncompressed: madrid-es
Data source import complete.


In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All"
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [4]:
import os
import shutil
from random import shuffle

def split_data(source_dir, train_dir, val_dir, test_dir, train_size=0.7, val_size=0.15):
    """
    Split the data into training, validation, and test sets.
    Each set will have its own directory containing subdirectories for each class.

    Parameters:
    - source_dir: Directory containing the dataset's class folders.
    - train_dir: Directory where the training data subdirectories will be stored.
    - val_dir: Directory where the validation data subdirectories will be stored.
    - test_dir: Directory where the test data subdirectories will be stored.
    - train_size: Proportion of the training set (default 0.7).
    - val_size: Proportion of the validation set (default 0.15).
    """
    classes = [d for d in os.listdir(source_dir) if os.path.isdir(os.path.join(source_dir, d))]
    print("Classes found: ", classes)

    if not os.path.exists(train_dir):
        os.makedirs(train_dir)
    if not os.path.exists(val_dir):
        os.makedirs(val_dir)
    if not os.path.exists(test_dir):
        os.makedirs(test_dir)

    for cls in classes:
        cls_dir = os.path.join(source_dir, cls)
        images = [f for f in os.listdir(cls_dir) if os.path.isfile(os.path.join(cls_dir, f))]

        # Shuffle the images to ensure random distribution
        shuffle(images)

        # Calculate split indices
        train_end = int(len(images) * train_size)
        val_end = train_end + int(len(images) * val_size)

        # Split the images into groups
        train_images = images[:train_end]
        val_images = images[train_end:val_end]
        test_images = images[val_end:]

        # Create class directories in train, val, and test
        train_cls_dir = os.path.join(train_dir, cls)
        val_cls_dir = os.path.join(val_dir, cls)
        test_cls_dir = os.path.join(test_dir, cls)

        os.makedirs(train_cls_dir, exist_ok=True)
        os.makedirs(val_cls_dir, exist_ok=True)
        os.makedirs(test_cls_dir, exist_ok=True)

        # Function to copy images to the designated folders
        def copy_images(images, dest_dir):
            for img in images:
                src_path = os.path.join(cls_dir, img)
                dest_path = os.path.join(dest_dir, img)
                shutil.copy(src_path, dest_path)

        # Copy images to their respective folders
        copy_images(train_images, train_cls_dir)
        copy_images(val_images, val_cls_dir)
        copy_images(test_images, test_cls_dir)

# Usage
source_directory = r'/kaggle/input/madrid-es/50km/50km'  # Path to the dataset directory
train_directory = r'/kaggle/working/data/madrid-es/50km/training'
val_directory = r'/kaggle/working/data/madrid-es/50km/evaluation'
test_directory = r'/kaggle/working/data/madrid-es/50km/testing'

split_data(source_directory, train_directory, val_directory, test_directory)
print("Spliting data completed!")




Classes found:  ['Discontinuous Medium Density Urban Fabric (S.L.  30% - 50%)', 'Airports', 'Railways and associated land', 'Fast transit roads and associated land', 'Water bodies', 'Sports and leisure facilities', 'Discontinuous Dense Urban Fabric (S.L.  50% -  80%)', 'default_value', 'Green urban areas', 'Other roads and associated land', 'Industrial, commercial, public, military and private units', 'Discontinuous Low Density Urban Fabric (S.L.  10% - 30%)', 'Construction sites', 'Agricultural + Semi-natural areas + Wetlands', 'Mineral extraction and dump sites', 'Forests', 'Continuous Urban Fabric (S.L.  80%)', 'Discontinuous Very Low Density Urban Fabric (S.L.  10%)']
Spliting data completed!


In [5]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

def setup_data_generators(train_dir, val_dir, batch_size=32, target_size=(224, 224)):
    """
    Set up image data generators for training and validation.

    Parameters:
    - train_dir: str, directory containing training data.
    - val_dir: str, directory containing validation data.
    - batch_size: int, size of the batches of data (default: 32).
    - target_size: tuple, dimensions to which all images found will be resized (default: (224, 224)).
    """

    # Setup the training data generator with augmentation
    train_datagen = ImageDataGenerator(
        rescale=1./255,
        shear_range=0.2,
        zoom_range=0.2,
        horizontal_flip=True
    )

    # Setup the validation data generator
    test_datagen = ImageDataGenerator(rescale=1./255)

    # Prepare flow from directory for training
    train_generator = train_datagen.flow_from_directory(
        train_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )

    # Prepare flow from directory for validation
    validation_generator = test_datagen.flow_from_directory(
        val_dir,
        target_size=target_size,
        batch_size=batch_size,
        class_mode='categorical'
    )

    print("\nPreprocessing of images complete!")

    return train_generator, validation_generator


In [2]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import (
    Input,
    Activation,
    Dense,
    Flatten,
    Conv2D,
    MaxPooling2D,
    AveragePooling2D,
    BatchNormalization,
    Add
)
from tensorflow.keras.regularizers import l2

def _bn_relu(input):
    """Helper function to apply Batch Normalization followed by ReLU."""
    norm = BatchNormalization()(input)
    return Activation("relu")(norm)

def _conv_bn_relu(filters, kernel_size, strides=(1, 1)):
    """Helper function to apply Convolution followed by Batch Normalization and ReLU."""
    def f(input):
        conv = Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
                      padding="same", kernel_initializer="he_normal",
                      kernel_regularizer=l2(1.e-4))(input)
        return _bn_relu(conv)
    return f

def _bn_relu_conv(filters, kernel_size, strides=(1, 1)):
    """Helper function to apply Batch Normalization, ReLU, and Convolution."""
    def f(input):
        activation = _bn_relu(input)
        return Conv2D(filters=filters, kernel_size=kernel_size, strides=strides,
                      padding="same", kernel_initializer="he_normal",
                      kernel_regularizer=l2(1.e-4))(activation)
    return f

def _shortcut(input, residual):
    """Apply a shortcut connection between input and residual block and merge with Add."""
    input_shape = tf.keras.backend.int_shape(input)
    residual_shape = tf.keras.backend.int_shape(residual)
    stride_width = int(round(input_shape[1] / residual_shape[1]))
    stride_height = int(round(input_shape[2] / residual_shape[2]))
    equal_channels = input_shape[3] == residual_shape[3]

    shortcut = input
    if stride_width > 1 or stride_height > 1 or not equal_channels:
        shortcut = Conv2D(filters=residual_shape[3], kernel_size=(1, 1),
                          strides=(stride_width, stride_height), padding="valid",
                          kernel_initializer="he_normal", kernel_regularizer=l2(0.0001))(input)
    return Add()([shortcut, residual])

def _residual_block(block_function, filters, repetitions, is_first_layer=False):
    """Build a residual block with repeating bottleneck layers."""
    def f(input):
        for i in range(repetitions):
            init_strides = (1, 1)
            if i == 0 and not is_first_layer:
                init_strides = (2, 2)
            input = block_function(filters, init_strides, is_first_block_of_first_layer=(is_first_layer and i == 0))(input)
        return input
    return f

def basic_block(filters, strides=(1, 1), is_first_block_of_first_layer=False):
    """Basic block for ResNet-18 and ResNet-34."""
    def f(input):
        if is_first_block_of_first_layer:
            conv1 = Conv2D(filters=filters, kernel_size=(3, 3), strides=strides,
                           padding="same", kernel_initializer="he_normal",
                           kernel_regularizer=l2(0.0001))(input)
        else:
            conv1 = _bn_relu_conv(filters, (3, 3), strides)(input)

        residual = _bn_relu_conv(filters, (3, 3))(conv1)
        return _shortcut(input, residual)
    return f

def bottleneck(filters, strides=(1, 1), is_first_block_of_first_layer=False):
    """Bottleneck block for ResNet-50, ResNet-101, and ResNet-152."""
    def f(input):
        if is_first_block_of_first_layer:
            conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1), strides=strides,
                              padding="same", kernel_initializer="he_normal",
                              kernel_regularizer=l2(0.0001))(input)
        else:
            conv_1_1 = _bn_relu_conv(filters, (1, 1), strides)(input)

        conv_3_3 = _bn_relu_conv(filters, (3, 3))(conv_1_1)
        residual = _bn_relu_conv(filters * 4, (1, 1))(conv_3_3)
        return _shortcut(input, residual)
    return f

def ResNetBuilder(input_shape, num_outputs, block_fn, repetitions):
    """Build ResNet architecture based on the block function and repetitions specified."""
    img_input = Input(shape=input_shape)
    conv1 = _conv_bn_relu(64, (7, 7), strides=(2, 2))(img_input)
    pool1 = MaxPooling2D(pool_size=(3, 3), strides=(2, 2), padding="same")(conv1)

    block = pool1
    filters = 64
    for i, r in enumerate(repetitions):
        block = _residual_block(block_fn, filters, r, is_first_layer=(i == 0))(block)
        filters *= 2

    block = _bn_relu(block)
    pool2 = AveragePooling2D(pool_size=(7, 7), strides=(1, 1))(block)
    flatten1 = Flatten()(pool2)
    dense = Dense(units=num_outputs, activation="softmax", kernel_initializer="he_normal")(flatten1)

    model = Model(inputs=img_input, outputs=dense)
    return model

def build_resnet_18(input_shape, num_outputs):
    """Build ResNet-18 architecture."""
    return ResNetBuilder(input_shape, num_outputs, basic_block, [2, 2, 2, 2])

def build_resnet_34(input_shape, num_outputs):
    """Build ResNet-34 architecture."""
    return ResNetBuilder(input_shape, num_outputs, basic_block, [3, 4, 6, 3])

def build_resnet_50(input_shape, num_outputs):
    """Build ResNet-50 architecture."""
    return ResNetBuilder(input_shape, num_outputs, bottleneck, [3, 4, 6, 3])

def build_resnet_101(input_shape, num_outputs):
    """Build ResNet-101 architecture."""
    return ResNetBuilder(input_shape, num_outputs, bottleneck, [3, 4, 23, 3])

def build_resnet_152(input_shape, num_outputs):
    """Build ResNet-152 architecture."""
    return ResNetBuilder(input_shape, num_outputs, bottleneck, [3, 8, 36, 3])

print("ResNet completed")


ResNet completed


In [6]:
import tensorflow as tf
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping, Callback
from tqdm.keras import TqdmCallback

# Assuming the 'setup_data_generators' function and 'build_resnet_50' are correctly defined/imported.

# Paths to training and validation data
train_dir = '/kaggle/working/data/madrid-es/50km/training'
val_dir = '/kaggle/working/data/madrid-es/50km/evaluation'

# Initialize data generators
train_generator, validation_generator = setup_data_generators(train_dir, val_dir)

# Build and compile the model
model = build_resnet_50(input_shape=(224, 224, 3), num_outputs=18)
model.compile(optimizer=Adam(learning_rate=0.001),
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Define callbacks
checkpoint = ModelCheckpoint('best_model.h5', monitor='val_accuracy', save_best_only=True, verbose=0)
early_stop = EarlyStopping(monitor='val_loss', patience=10, verbose=0)
tqdm_callback = TqdmCallback(verbose=1)  # TQDM callback for more interactive progress bar

# Train the model using the tqdm progress bar through the callback
history = model.fit(train_generator,
                    epochs=50,
                    validation_data=validation_generator,
                    callbacks=[checkpoint, early_stop, tqdm_callback],
                    verbose=0)  # Set verbose=0 to prevent default progress bar from showing

# Save the entire model for later use
model.save('madrid_land_model.h5')

print("Training Complete")


Found 24082 images belonging to 18 classes.
Found 5152 images belonging to 18 classes.

Preprocessing of images complete!


0epoch [00:00, ?epoch/s]

0batch [00:00, ?batch/s]

  saving_api.save_model(


KeyboardInterrupt: 

In [9]:
model.save('best_model.h5')

print("Training stopped and model saved.")

Training stopped and model saved.


In [10]:
from google.colab import drive
drive.mount('/content/drive')

model.save('/content/drive/My Drive/best_model.h5')
print("Model saved to Google Drive.")


Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).
Model saved to Google Drive.
