In [2]:
import gzip
import os
import sys
import urllib
import matplotlib.image as mpimg
from PIL import Image

import code
import matplotlib.pyplot as plt

import tensorflow.python.platform
import sys
import numpy
import tensorflow as tf
import argparse
import numpy as np
from tensorflow.keras import layers, models, losses, optimizers
from tensorflow.keras.layers import Conv2D

%load_ext autoreload
%autoreload 2



2024-12-19 09:24:29.844292: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [3]:
NUM_CHANNELS = 3  # RGB images
PIXEL_DEPTH = 255
NUM_LABELS = 2
SEED = 66478  # Set to None for random seed.
BATCH_SIZE = 32  # 64
IMG_PATCH_SIZE = 16
TRAINING_SIZE = 100

# Functions

In [4]:
def img_crop(im, w, h):
    '''
    Crop an image into patches of size w x h
    '''
    list_patches = []
    imgwidth = im.shape[0]
    imgheight = im.shape[1]
    is_2d = len(im.shape) < 3
    for i in range(0, imgheight, h):
        for j in range(0, imgwidth, w):
            if is_2d:
                im_patch = im[j : j + w, i : i + h]
            else:
                im_patch = im[j : j + w, i : i + h, :]
            list_patches.append(im_patch)
    return list_patches

def value_to_class(v):
    foreground_threshold = 0.25  # percentage of pixels > 1 required to assign a foreground label to a patch
    df = numpy.sum(v)
    if df > foreground_threshold:  # road
        return [0, 1]
    else:  # bgrd
        return [1, 0]

def extract_data(filename, num_images):
    """Extract the images into a 4D tensor [image index, y, x, channels].
    Values are rescaled from [0, 255] down to [-0.5, 0.5].
    """
    imgs = []
    for i in range(1, num_images + 1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            # print("Loading " + image_filename)
            img = mpimg.imread(image_filename)
            # Normalize
            img = (img - np.min(img)) / (np.max(img) - np.min(img))
            imgs.append(img)
        else:
            print("File " + image_filename + " does not exist")

    num_images = len(imgs)
    IMG_WIDTH = imgs[0].shape[0]
    IMG_HEIGHT = imgs[0].shape[1]
    N_PATCHES_PER_IMAGE = (IMG_WIDTH / IMG_PATCH_SIZE) * (IMG_HEIGHT / IMG_PATCH_SIZE)

    img_patches = [
        img_crop(imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)
    ] # list of list of patches, each list of patches corresponds to one image
    data = [
        img_patches[i][j]
        for i in range(len(img_patches))
        for j in range(len(img_patches[i]))
    ] # flatten the list of patches

    return numpy.asarray(data)

def cropped_imgs(images, patch_size):
    '''
    Crop the labels into patches of size patch_size x patch_size
    '''
    imgs_patches = [
        img_crop(images[i], patch_size, patch_size) for i in range(len(images))
    ]
    return numpy.asarray(
        [
            imgs_patches[i][j]
            for i in range(len(imgs_patches))
            for j in range(len(imgs_patches[i]))
        ]
    )
    
def cropped_labels(labels, patch_size):

    labels_patches = [
        img_crop(labels[i], patch_size, patch_size) for i in range(len(labels))
    ]

    data = numpy.asarray(
        [ 
            labels_patches[i][j]
            for i in range(len(labels_patches))
            for j in range(len(labels_patches[i]))
        ]
    )
    labels = np.asarray(
        [value_to_class(np.mean(data[i])) for i in range(len(data))]
    )
    
    return labels.astype(numpy.float32)
    

def extract_labels(filename, num_images):
    """Extract the labels into a 1-hot matrix [image index, label index]."""
    gt_imgs = []
    for i in range(1, num_images + 1):
        imageid = "satImage_%.3d" % i
        image_filename = filename + imageid + ".png"
        if os.path.isfile(image_filename):
            # print("Loading " + image_filename)
            img = mpimg.imread(image_filename)
            gt_imgs.append(img)
        else:
            print("File " + image_filename + " does not exist")

    num_images = len(gt_imgs)
    gt_patches = [
        img_crop(gt_imgs[i], IMG_PATCH_SIZE, IMG_PATCH_SIZE) for i in range(num_images)
    ]
    data = numpy.asarray(
        [
            gt_patches[i][j]
            for i in range(len(gt_patches))
            for j in range(len(gt_patches[i]))
        ]
    )
    labels = numpy.asarray(
        [value_to_class(numpy.mean(data[i])) for i in range(len(data))]
    )

    # Convert to dense 1-hot representation.
    return labels.astype(numpy.float32)

# Data Loading

In [5]:
data_dir = os.getcwd() + '/dataset/training/'
data_filename = data_dir + "images/"
labels_filename = data_dir + "groundtruth/"

def load_image(infilename):
    data = mpimg.imread(infilename)
    return data

def load_data(image_dir, gt_dir, training_size):
      files = image_dir + 'satImage_'
      #n = len(files)
      n = 100
      print("Loading " + str(n) + " images")
      imgs = [load_image(files + '%.3d' % i + '.png') for i in range(1,n)]
      print(imgs[0][2])

      gt_dir =gt_dir + 'satImage_'
      print("Loading " + str(n) + " images")
      gt_imgs = [load_image(gt_dir + '%.3d' % i + '.png') for i in range(1,n)]

      X_train = imgs
      Y_train = gt_imgs
      return X_train, Y_train

X, Y = load_data(data_filename, labels_filename, TRAINING_SIZE)


Loading 100 images
[[0.3254902  0.3019608  0.27058825]
 [0.31764707 0.28627452 0.25490198]
 [0.3137255  0.2901961  0.25490198]
 ...
 [0.31764707 0.31764707 0.29411766]
 [0.3137255  0.3137255  0.2901961 ]
 [0.31764707 0.31764707 0.3019608 ]]
Loading 100 images


In [6]:
train_ratio = 0.7
val_ratio = 0.15

train_size = int(train_ratio * TRAINING_SIZE)

X_train = X[:train_size]
Y_train = Y[:train_size]

X_val = X[train_size:train_size + int(val_ratio * TRAINING_SIZE)]
Y_val = Y[train_size:train_size + int(val_ratio * TRAINING_SIZE)]

X_test = X[train_size + int(val_ratio * TRAINING_SIZE):]
Y_test = Y[train_size + int(val_ratio * TRAINING_SIZE):]

In [7]:
print(len(Y_train))

70


In [8]:
train_data = cropped_imgs(X_train, 16)
train_labels = cropped_labels(Y_train, 16)
val_data = cropped_imgs(X_val, 16)
val_labels = cropped_labels(Y_val, 16)
test_data = cropped_imgs(X_test, 16)
test_labels = cropped_labels(Y_test, 16)


# Balancing data

In [9]:
# Balance the classes.
print("Balancing training data...")
c0 = np.sum(train_labels[:, 0] == 1) # Count the number of data points in class 0
c1 = np.sum(train_labels[:, 1] == 1) # Count the number of data points in class 1
print(f"Number of data points per class: c0 = {c0}, c1 = {c1}")
min_c = min(c0, c1)
idx0 = np.where(train_labels[:, 0] == 1)[0][:min_c] # Get the indices of the first class
idx1 = np.where(train_labels[:, 1] == 1)[0][:min_c] # Get the indices of the second class
balanced_indices = np.concatenate([idx0, idx1]) 
train_data = train_data[balanced_indices] 
train_labels = train_labels[balanced_indices]
    
print(f"Balanced dataset size: {train_data.shape[0]}")

Balancing training data...
Number of data points per class: c0 = 32663, c1 = 11087
Balanced dataset size: 22174


# Support Vector Machine (SVM)

## Training

In [10]:
from sklearn import svm
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.metrics import make_scorer, accuracy_score, f1_score, precision_score, recall_score

# Define a custom scorer using F1-score (or other metrics)
scorer = make_scorer(f1_score, average='weighted')

# Hyperparameter grid to tune
param_grid = {
    'C': [0.1, 1, 10, 100],        # Regularization parameter
}

# Reshape train data
train_data = train_data.reshape(train_data.shape[0], -1)
train_labels_flat = np.argmax(train_labels, axis=1)

# Create the SVM model
svc = svm.SVC()

# Use GridSearchCV to tune hyperparameters
grid_search = GridSearchCV(
    svc, 
    param_grid, 
    scoring=scorer, 
    cv=5,   # 5-fold cross-validation
    verbose=2
)

# Fit the grid search to the training data
grid_search.fit(train_data, train_labels_flat)

# Best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Evaluate the best model
best_model = grid_search.best_estimator_
predictions = best_model.predict(train_data)

# Evaluate metrics
accuracy = accuracy_score(train_labels_flat, predictions)
f1 = f1_score(train_labels_flat, predictions, average='weighted')
precision = precision_score(train_labels_flat, predictions, average='weighted')
recall = recall_score(train_labels_flat, predictions, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")


Fitting 5 folds for each of 4 candidates, totalling 20 fits
[CV] END ..............................................C=0.1; total time= 3.6min


## Predictions

In [None]:
# predict the labels of the test set
test_data = test_data.reshape(test_data.shape[0], -1)
predicted_labels = svm.predict(test_data)


## Results

In [None]:
# evaluate the classifier
accuracy = accuracy_score(np.argmax(test_labels, axis=1), predicted_labels)
precision = precision_score(np.argmax(test_labels, axis=1), predicted_labels)
recall = recall_score(np.argmax(test_labels, axis=1), predicted_labels)
f1 = f1_score(np.argmax(test_labels, axis=1), predicted_labels)

# use 

print(f"f1, {f1}, accuracy, {accuracy}, precision, {precision}, recall, {recall}")

f1, 0.43212917350848384, accuracy, 0.5257142857142857, precision, 0.304473582722715, recall, 0.7441093308199811


# Logistic regression

## Training

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import make_scorer, accuracy_score, f1_score, precision_score, recall_score
import numpy as np

# Reshape train data
train_data = train_data.reshape(train_data.shape[0], -1)
train_labels_flat = np.argmax(train_labels, axis=1)

# Define a custom scorer (e.g., F1-score)
scorer = make_scorer(f1_score, average='weighted')

# Hyperparameter grid
param_grid = {
    'C': [0.001, 0.01, 0.1, 1, 10, 100, 1000],  # Regularization strength
}

# Create the logistic regression model
logreg = LogisticRegression(random_state=0)

# GridSearchCV
grid_search = GridSearchCV(
    logreg, 
    param_grid, 
    scoring=scorer, 
    cv=5,  # 5-fold cross-validation
    verbose=2
)

# Fit the grid search
grid_search.fit(train_data, train_labels_flat)

# Best parameters and best score
print("Best Parameters:", grid_search.best_params_)
print("Best Score:", grid_search.best_score_)

# Evaluate the best model
best_model = grid_search.best_estimator_
predictions = best_model.predict(train_data)

# Evaluation metrics
accuracy = accuracy_score(train_labels_flat, predictions)
f1 = f1_score(train_labels_flat, predictions, average='weighted')
precision = precision_score(train_labels_flat, predictions, average='weighted')
recall = recall_score(train_labels_flat, predictions, average='weighted')

print(f"Accuracy: {accuracy}")
print(f"F1 Score: {f1}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


## Prediction

In [None]:
# predict the labels of the test set
predicted_labels = logreg.predict(test_data)
f1_log = f1_score(np.argmax(test_labels, axis=1), predicted_labels)
accuracy_log = accuracy_score(np.argmax(test_labels, axis=1), predicted_labels)

print(f"f1, {f1_log}, accuracy, {accuracy_log}")

f1, 0.4184859654096966, accuracy, 0.5312
