In [1]:
# Load the Drive helper and mount
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
!pip install rasterio gdal geopandas
!pip install gdal==3.4.1

Collecting gdal==3.4.1
  Downloading GDAL-3.4.1.tar.gz (755 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m755.9/755.9 kB[0m [31m7.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: gdal


In [6]:
# Import libraries
import sys
sys.path.append('/content/drive/MyDrive/Lecture/Codes/Custom Data Creation.ipynb') # Please put the path accordingly where you have stored the data

from utils import *

import fiona
import rasterio
import gdal
from rasterio.mask import mask
import os
from rasterio import plot
from glob import glob
import geopandas as gpd
import numpy as np
from shapely.geometry import box

from matplotlib import pyplot as plt

ModuleNotFoundError: No module named 'gdal'

In [4]:
!pip install gdal



In [None]:
# You need to use the following parameters in the generate_mask function:

# Path where the study_area.tif raster is located
raster_path ="data/Images/image_1.tif"

# Path where the train_landslide.shp is located
shape_path = "data/Inventory/landslides.shp"

# Path to save the output file
output_path ="data/Masks"

# Output file name
file_name ="image_1.tif" # Put a name here as to how you want to save the file.

# Run the function for the training masks
generate_mask(raster_path=raster_path, shape_path=shape_path,
             output_path=output_path, file_name=file_name)

In [None]:
def readdata(directory, folder_name):

    data_path = os.path.join(directory, folder_name)
    data_list = {}

    for f in sorted(os.listdir(data_path)):
        fdir = os.path.join(data_path, f)
        _, ext = os.path.splitext(f)
        if ext.lower() == ".tif":
            imgtype = f[-20:-4] # Number of characters of the image file. Example, "Image_1.tif" = 11 characters
            image_data=gdal.Open(fdir)
            bands = [image_data.GetRasterBand(i+1).ReadAsArray() for i in range(image_data.RasterCount)]
            data_list[imgtype] = np.stack(bands, axis=2)

    return data_list

In [None]:
Data_folder = "data/"
image_list = readdata(directory=Data_folder, folder_name="Images")
label_list = readdata(directory=Data_folder, folder_name="Masks")

In [None]:
# Print all the stacks that are collected
print("The collection of images available in the list are:")
print("-"*30)
print(*image_list, sep='\n')

# Print all the stacks that are collected
print("The collection of labels available in the list are:")
print("-"*30)
print(*label_list, sep='\n')

In [None]:
def gridwise_sample(imgarray, patchsize):

    """Extract sample patches of size patchsize x patchsize from an image (imgarray) in a gridwise manner.
    """
    nrows, ncols, nbands = imgarray.shape
    patchsamples = np.zeros(shape=(0, patchsize, patchsize, nbands),
                            dtype=imgarray.dtype)
    for i in range(int(nrows/patchsize)):
        for j in range(int(ncols/patchsize)):
            tocat = imgarray[i*patchsize:(i+1)*patchsize,
                             j*patchsize:(j+1)*patchsize, :]
            tocat = np.expand_dims(tocat, axis=0)
            patchsamples = np.concatenate((patchsamples, tocat),
                                          axis=0)
    return patchsamples

In [None]:
# FOR GENERATING PATCHES

PATCHSIZE = 128
NBANDS = image_list["image_1"].shape[-1]

Xtrain = np.zeros(shape=(0, PATCHSIZE, PATCHSIZE, NBANDS), dtype=np.float32)
Ytrain = np.zeros(shape=(0, PATCHSIZE, PATCHSIZE, 1), dtype=np.float32)

# Sample each training tile systematically in a gridwise manner
train_areas = ["image_1"]

for area in train_areas:
    X_toadd = gridwise_sample(image_list[area], PATCHSIZE)
    Y_toadd = gridwise_sample(label_list[area], PATCHSIZE)
    Xtrain = np.concatenate((Xtrain, X_toadd), axis=0)
    Ytrain = np.concatenate((Ytrain, Y_toadd), axis=0)

print(np.unique(Ytrain))
# Encode all landslide and background labels into their respective classes "0" and "1" (Categorical)
Ytrain[Ytrain==0] = 0
# Ytrain[Ytrain==-1] = 0
Ytrain[Ytrain==255] = 1
print(np.unique(Ytrain))

print("There are %i number of training patches" % (Xtrain.shape[0]))

[  0. 255.]
[0. 1.]
There are 330 number of training patches


In [None]:
from sklearn.model_selection import train_test_split

X_train, X_test, Y_train, Y_test = train_test_split(Xtrain, Ytrain, test_size=0.33, random_state=42) # Here we split the created data into train set and test set
X_train, X_val, Y_train, Y_val =train_test_split(X_train, Y_train, test_size=0.33, random_state=42) # Here we split the train set into another train set and validation set. So, train set is split into two parts here.

In [None]:
np.save("/content/drive/MyDrive/Colombia/Data/Arrays/Xtrain.npy", Xtrain)
np.save("/content/drive/MyDrive/Colombia/Data/Arrays/Ytrain.npy", Ytrain)

np.save("/content/drive/MyDrive/Colombia/Data/Arrays/X_val.npy", X_val)
np.save("/content/drive/MyDrive/Colombia/Data/Arrays/Y_val.npy", Y_val)

np.save("/content/drive/MyDrive/Colombia/Data/Arrays/X_test.npy", X_test)
np.save("/content/drive/MyDrive/Colombia/Data/Arrays/Y_test.npy", Y_test)

NameError: ignored