<a href="https://colab.research.google.com/github/paudelsushil/labelcombinations/blob/main/Project_adleo_geog315_spring24.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Objective3:
## DeepLab3+ Model

# 1. Data Preparation

In [6]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [22]:
# install required packages

%%capture
!pip install rasterio

In [23]:
# Import required packages
import os
from pathlib import Path

from datetime import datetime, timedelta
import tqdm # Adds a smart progress meter to any iterable or file operation

import math
import random
import pandas as pd
import numpy as np


import cv2
import rasterio
#  defines a rectangular area within the raster using four properties
# xoff, yoff, width, height
from rasterio.windows import Window


import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import _LRScheduler
from torch.utils.tensorboard import SummaryWriter


from IPython.core.debugger import set_trace # Insert a breakpoint into the code
from IPython.display import Image

import matplotlib.pyplot as plt

## Defining the Dataset for training, validating, and testing the model

In [24]:
src_dir = "/content/gdrive/MyDrive/adleo/project_data"

WorkingFolder = "/content/gdrive/MyDrive/adleo/project_data"


## Load Data

In [44]:
img_paths = os.listdir(os.path.join(src_dir, "images"))
lbl_paths = os.listdir(os.path.join(src_dir, "labels"))

# Check if all paths are valid lists
if not all(isinstance(path_list, list) for path_list in (img_paths, lbl_paths)):
    raise ValueError("Both image_paths and label_paths must be lists.")

print("No. of images:",len(img_paths), "\n", "No. of labels:", len(lbl_paths))



No. of images: 33874 
 No. of labels: 33757


33874 33757


## Pre-processing the data for the model

In [None]:
#===============================================================================
# Data Loader Function
#-------------------------------------------------------------------------------
def load_data(data_path, usage="train", window=None, norm_stats_type=None,
              is_label=False):
    '''
    Read geographic data into numpy array
    Params:
        data_path : str
            Path of data to load
        usage : str
            Usage of the data: "train", "validate", or "predict"
        window : tuple
            The view onto a rectangular subset of the data, in the format of
            (column offsets, row offsets, width in pixel, height in pixel)
        norm_stats_type : str
            How the normalization statistics is calculated.
        is_label : binary
            Decide whether to saturate data with tested threshold
    Returns:
        narray
    '''

    with rasterio.open(data_path, "r") as src:

        if is_label:
            if src.count != 1:
                raise InputError("Label shape not applicable: \
                                 expected 1 channel")
            img = src.read(1)
        else:
            nodata = src.nodata
            assert norm_stats_type in ["local_per_tile", "local_per_band",
                                       "global_per_band"]
            if norm_stats_type == "local_per_tile":
                img = mmnorm1(src.read(), nodata=nodata)
            elif norm_stats_type == "local_per_band":
                img = mmnorm2(src.read(), nodata=nodata, clip_val=1.5)
            elif norm_stats_type == "global_per_band":
                img = mmnorm3(src.read(), nodata=nodata, clip_val=1.5)

            if usage in ['train', 'validate']:
               img = img[:, max(0, window[1]): window[1] + window[3],
                         max(0, window[0]): window[0] + window[2]]

    return img
#===============================================================================
# Normalization Function
#-------------------------------------------------------------------------------
def mmnorm1(img, nodata):
    '''
    Data normalization with min/max method
    Params:
        img (narray): The targeted image for normalization
    Returns:
        narrray
    '''

    img_tmp = np.where(img == nodata, np.nan, img)
    img_max = np.nanmax(img_tmp)
    img_min = np.nanmin(img_tmp)
    normalized = (img - img_min) / (img_max - img_min)
    normalized = np.clip(normalized, 0, 1)

    return normalized
#-------------------------------------------------------------------------------
def mmnorm2(img, nodata, clip_val=None):
    r"""
    Normalize the input image pixels to [0, 1] ranged based on the
    minimum and maximum statistics of each band per tile.
    Arguments:
            img : numpy array
                Stacked image bands with a dimension of (C,H,W).
            nodata : str
                Value reserved to represent NoData in the image chip.
            clip_val : int
                Defines how much of the distribution tails to be cut off.
    Returns:
            img : numpy array
                Normalized image stack of size (C,H,W).
    Note 1: If clip then min, max are calculated from the clipped image.
    """

    # filter out zero pixels in generating statistics.
    nan_corr_img = np.where(img == nodata, np.nan, img)
    nan_corr_img = np.where(img == 0, np.nan, img)

    if clip_val > 0:
        left_tail_clip = np.nanpercentile(nan_corr_img, clip_val)
        right_tail_clip = np.nanpercentile(nan_corr_img, 100 - clip_val)

        left_clipped_img = np.where(img < left_tail_clip, left_tail_clip, img)
        clipped_img = np.where(left_clipped_img > right_tail_clip,
                               right_tail_clip, left_clipped_img)

        normalized_bands = []
        for i in range(img.shape[0]):
            band_min = np.nanmin(clipped_img[i, :, :])
            band_max = np.nanmax(clipped_img[i, :, :])
            normalized_band = (clipped_img[i, :, :] - band_min) /\
                (band_max - band_min)
            normalized_bands.append(np.expand_dims(normalized_band, 0))
        normal_img = np.concatenate(normalized_bands, 0)

    elif clip_val == 0 or clip_val is None:
        normalized_bands = []
        for i in range(img.shape[0]):
            band_min = np.nanmin(nan_corr_img[i, :, :])
            band_max = np.nanmax(nan_corr_img[i, :, :])
            normalized_band = (nan_corr_img[i, :, :] - band_min) /\
                (band_max - band_min)
            normalized_bands.append(np.expand_dims(normalized_band, 0))
        normal_img = np.concatenate(normalized_bands, 0)

    else:
        raise ValueError("clip must be a non-negative decimal.")

    normal_img = np.clip(normal_img, 0, 1)
    return normal_img
#------------------------------------------------------------------------------
def mmnorm3(img, nodata, clip_val=None):
    hardcoded_stats = {
        "mins": np.array([331.0, 581.0, 560.0, 1696.0]),
        "maxs": np.array([1403.0, 1638.0, 2076.0, 3652.0])
    }

    num_bands = img.shape[0]
    mins = hardcoded_stats["mins"]
    maxs = hardcoded_stats["maxs"]

    if clip_val:
        normalized_bands = []
        for i in range(num_bands):
            nan_corr_img = np.where(img[i, :, :] == nodata, np.nan,
                                    img[i, :, :])
            nan_corr_img = np.where(img[i, :, :] == 0, np.nan, img[i, :, :])
            left_tail_clip = np.nanpercentile(nan_corr_img, clip_val)
            right_tail_clip = np.nanpercentile(nan_corr_img, 100 - clip_val)
            left_clipped_band = np.where(img[i, :, :] < left_tail_clip,
                                         left_tail_clip, img[i, :, :])
            clipped_band = np.where(left_clipped_band > right_tail_clip,
                                    right_tail_clip, left_clipped_band)
            normalized_band = (clipped_band - mins[i]) / (maxs[i] - mins[i])
            normalized_bands.append(np.expand_dims(normalized_band, 0))
        img = np.concatenate(normalized_bands, 0)

    else:
        for i in range(num_bands):
            img[i, :, :] = (img[i, :, :] - mins[i]) / (maxs[i] - mins[i])

    img = np.clip(img, 0, 1)
    return img
#------------------------------------------------------------------------------
# Input Error method for Error handling message
#------------------------------------------------------------------------------

class InputError(Exception):
    '''
    Exception raised for errors in the input
    '''

    def __init__(self, message):
        '''
        Params:
            message (str): explanation of the error

        '''

        self.message = message

    def __str__(self):
        '''
        Define message to return when error is raised
        '''

        if self.message:
            return 'InputError, {} '.format(self.message)
        else:
            return 'InputError'

### Utility Functions

### Image Normalization

In [None]:
def min_max_normalize_image(image, dtype=np.float32):
    """
    image_path(str) : Absolute path to the image patch.
    dtype (numpy datatype) : data type of the normalized image default is
    "np.float32".
    """

    # Calculate the minimum and maximum values for each band
    min_values = np.nanmin(image, axis=(1, 2))[:, np.newaxis, np.newaxis]
    max_values = np.nanmax(image, axis=(1, 2))[:, np.newaxis, np.newaxis]

    # Normalize the image data to the range [0, 1]
    normalized_img = (image - min_values) / (max_values - min_values)

    # Return the normalized image data
    return normalized_img

### Image Augmentation

# Model Building
Deeplab3+ based on [Chen et al., 2024](https://link.springer.com/content/pdf/10.1007/s40747-023-01304-z.pdf)
