In [45]:
import os
import csv
import shutil

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tqdm import tqdm

import cv2

In [46]:
DATA_PATH = "./data"
metadata_df = pd.read_csv(os.path.join(DATA_PATH, "metadata.csv"))

metadata_df

Unnamed: 0,image_id,split,sat_image_path,mask_path
0,GF2_PMS1__L1A0001015649-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
1,GF2_PMS1__L1A0001094941-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
2,GF2_PMS1__L1A0001037899-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
3,GF2_PMS1__L1A0001104323-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
4,GF2_PMS1__L1A0001290139-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
5,GF2_PMS1__L1A0000962382-MSS1,train,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
6,GF2_PMS1__L1A0001015648-MSS1,valid,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
7,GF2_PMS1__L1A0001348919-MSS1,valid,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
8,GF2_PMS1__L1A0001118839-MSS1,test,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
9,GF2_PMS1__L1A0001064454-MSS1,test,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...


In [47]:
# select only images with masks
metadata_df = metadata_df[metadata_df["split"]=="train"]
# drop "split" column
metadata_df = metadata_df[["image_id", "sat_image_path", "mask_path"]]
# modify the path of images and masks
metadata_df["sat_image_path"] = metadata_df["sat_image_path"].apply(lambda img_pth: os.path.join(DATA_PATH, img_pth))
metadata_df["mask_path"] = metadata_df["mask_path"].apply(lambda mask_pth: os.path.join(DATA_PATH, mask_pth))

metadata_df.head()

Unnamed: 0,image_id,sat_image_path,mask_path
0,GF2_PMS1__L1A0001015649-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
1,GF2_PMS1__L1A0001094941-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
2,GF2_PMS1__L1A0001037899-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
3,GF2_PMS1__L1A0001104323-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
4,GF2_PMS1__L1A0001290139-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...


In [48]:
metadata_df_shuffled = metadata_df.sample(frac = 1, random_state = 0).reset_index(drop = True)

train_df, valid_df, test_df = np.split(metadata_df_shuffled, [int(0.8*len(metadata_df_shuffled)), int(0.9*len(metadata_df_shuffled))])

print("train:", len(train_df))
print("valid:", len(valid_df))
print("test:", len(test_df))

train: 4
valid: 1
test: 1


  return bound(*args, **kwds)


In [49]:
train_df.head()

Unnamed: 0,image_id,sat_image_path,mask_path
0,GF2_PMS1__L1A0000962382-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
1,GF2_PMS1__L1A0001037899-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
2,GF2_PMS1__L1A0001094941-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...
3,GF2_PMS1__L1A0001104323-MSS1,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...,C:\Users\oswal\Documents\Kuliah\Semester 7\Pra...


In [50]:
for dir_name in ["train_patches", "valid_patches", "test_patches"]:
    
    DIR_PATH = os.path.join(DATA_PATH, dir_name)

    if os.path.isdir(DIR_PATH):
        # remove directory
        shutil.rmtree(DIR_PATH)
        print("Directory '{}' removed".format(DIR_PATH))

    if not os.path.isdir(DIR_PATH):
        # create directory
        os.mkdir(DIR_PATH)
        print("Directory '{}' created".format(DIR_PATH))

Directory './data\train_patches' removed
Directory './data\train_patches' created
Directory './data\valid_patches' removed
Directory './data\valid_patches' created
Directory './data\test_patches' removed
Directory './data\test_patches' created


In [51]:
for file_name in ["train", "valid", "test"]:
    
    METADATA_PATCHES_PATH = os.path.join(DATA_PATH, file_name + "_metadata_patches.csv")

    if os.path.exists(METADATA_PATCHES_PATH):
        # remove file
        os.remove(METADATA_PATCHES_PATH)
        print("File {} removed".format(METADATA_PATCHES_PATH))

    # create file with header
    with open(METADATA_PATCHES_PATH, "w", newline = "") as f:
        writer = csv.writer(f)
        writer.writerow(["image_id", "sat_image_path", "mask_path"])
        print("File {} created".format(METADATA_PATCHES_PATH))

File ./data\train_metadata_patches.csv removed
File ./data\train_metadata_patches.csv created
File ./data\valid_metadata_patches.csv removed
File ./data\valid_metadata_patches.csv created
File ./data\test_metadata_patches.csv removed
File ./data\test_metadata_patches.csv created


In [52]:
# patches settings
original_size = 7300
scale = [1.25, 1, 0.75, 0.5]
patch_size = 128
stride = 128

patch_idx = {}
for s in scale:
    patch_idx[s] =  [stride*i for i in range(int(int(original_size*s)/stride))]

In [53]:
print("-------------------------------------")
print("Patches information along 1 dimension")
print("-------------------------------------\n")

format_spec = "{:<8} {:<84} {:<18} {:<10}"
print(format_spec.format("scale:", "patch indexes:", "discarded pixels:", "number of patches:"), "\n")

for key, value in patch_idx.items():
    discarded_pixels = int(original_size*key) - (value[-1] + patch_size)
    print(format_spec.format(str(key), str(value), str(discarded_pixels), str(len(value))))

-------------------------------------
Patches information along 1 dimension
-------------------------------------

scale:   patch indexes:                                                                       discarded pixels:  number of patches: 

1.25     [0, 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048, 2176, 2304, 2432, 2560, 2688, 2816, 2944, 3072, 3200, 3328, 3456, 3584, 3712, 3840, 3968, 4096, 4224, 4352, 4480, 4608, 4736, 4864, 4992, 5120, 5248, 5376, 5504, 5632, 5760, 5888, 6016, 6144, 6272, 6400, 6528, 6656, 6784, 6912, 7040, 7168, 7296, 7424, 7552, 7680, 7808, 7936, 8064, 8192, 8320, 8448, 8576, 8704, 8832, 8960] 37                 71        
1        [0, 128, 256, 384, 512, 640, 768, 896, 1024, 1152, 1280, 1408, 1536, 1664, 1792, 1920, 2048, 2176, 2304, 2432, 2560, 2688, 2816, 2944, 3072, 3200, 3328, 3456, 3584, 3712, 3840, 3968, 4096, 4224, 4352, 4480, 4608, 4736, 4864, 4992, 5120, 5248, 5376, 5504, 5632, 5760, 5888, 6016, 6144, 6

In [54]:
def create_patches(metadata, scale, dir_name, METADATA_PATCHES_PATH):

    id = metadata[0]
    image_path = metadata[1]
    mask_path = metadata[2]

    image = cv2.cvtColor(cv2.imread(image_path), cv2.COLOR_BGR2RGB)
    mask = cv2.cvtColor(cv2.imread(mask_path), cv2.COLOR_BGR2RGB)

    if image.shape[0] != original_size:
        raise ValueError("The image size should be " + str(original_size))

    for s in scale:

        # resize image and mask
        scaled_size = int(image.shape[0]*s)
        scaled_image = cv2.resize(image, (scaled_size, scaled_size))
        scaled_mask = cv2.resize(mask, (scaled_size, scaled_size))

        if scaled_image.shape[0] < patch_idx[s][-1] + patch_size:
            raise ValueError("The scaled image should contain all the patches")

        for i in patch_idx[s]:
            for j in patch_idx[s]:

                # get patches
                scaled_image_patch = scaled_image[i:i+patch_size, j:j+patch_size, :]
                scaled_mask_patch = scaled_mask[i:i+patch_size, j:j+patch_size, :]

                # create paths
                scaled_image_patch_path = dir_name + "/{}_sat_{}_{}_{}.tif".format(str(id), str(int(s*100)), str(i), str(j))
                scaled_mask_patch_path = dir_name + "/{}_mask_{}_{}_{}.tif".format(str(id), str(int(s*100)), str(i), str(j))

                # save patches
                cv2.imwrite(os.path.join(DATA_PATH, scaled_image_patch_path), scaled_image_patch)
                cv2.imwrite(os.path.join(DATA_PATH, scaled_mask_patch_path), scaled_mask_patch)

                # update the metadata of patches
                with open(METADATA_PATCHES_PATH, "a", newline = "") as f:
                    writer = csv.writer(f)
                    writer.writerow([id, scaled_image_patch_path, scaled_mask_patch_path])

In [55]:
# create train patches
for sample in tqdm(train_df[["image_id", "sat_image_path","mask_path"]].values.tolist()):
    create_patches(sample, scale, 'train_patches', './data/train_metadata_patches.csv')

100%|██████████| 4/4 [01:35<00:00, 23.84s/it]


In [56]:
# create valid patches
for sample in tqdm(valid_df[["image_id", "sat_image_path","mask_path"]].values.tolist()):
    create_patches(sample, [1], 'valid_patches', './data/valid_metadata_patches.csv')

100%|██████████| 1/1 [00:08<00:00,  8.46s/it]


In [57]:
# create test patches
for sample in tqdm(test_df[["image_id", "sat_image_path","mask_path"]].values.tolist()):
    create_patches(sample, [1], 'test_patches', './data/test_metadata_patches.csv')

100%|██████████| 1/1 [00:08<00:00,  8.25s/it]
