In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


# New Section

# Prepare data


Create masks from labeled images


In [None]:
import json
from shapely.geometry import Polygon
import matplotlib.pyplot as plt
from PIL import Image, ImageDraw
import os
import shutil
import numpy as np
import tifffile as tiff

def create_mask_from_json(json_path, image_path, output_dir):
    image_base_name = os.path.basename(image_path) #get file name, e.g 'kho_xang_.tif'

    # Đọc tập tin .json chứa thông tin về nhãn
    with open(json_path, 'r') as file:
        data = json.load(file)

    shapes = data.get("shapes", [])  # Lấy danh sách các hình đa giác

    # Đọc ảnh vệ tinh
    img = Image.open(image_path)

    # Tạo ảnh mask mới với kích thước và định dạng như ảnh vệ tinh
    mask = Image.new("L", img.size, 0)
    draw = ImageDraw.Draw(mask)

    # Lặp qua danh sách các hình đa giác và tô màu trắng vào các vùng đa giác biểu diễn nhãn
    for shape in shapes:
        points = shape.get("points", [])
        xy = [tuple(map(int, p)) for p in points]
        draw.polygon(xy, outline=1, fill=1)

    # Chuyển mask thành mảng numpy và điều chỉnh màu sắc
    mask_array = np.array(mask)
    mask_array[mask_array == 1] = 255



    # Lưu mask vào tập tin .tif
    mask_dir = os.path.join(output_dir)
    os.makedirs(mask_dir, exist_ok=True)
    mask_path = os.path.join(mask_dir, image_base_name) #save the output file as .tif

    # if file already exists, remove it
    if os.path.isfile(mask_path):
      os.remove(mask_path)
    tiff.imwrite(mask_path, mask_array)



In [None]:
folder_image_path = "/content/drive/MyDrive/GR2/Scene"
folder_json_path = "/content/drive/MyDrive/GR2/Json"
folder_mask_path = "/content/drive/MyDrive/GR2/Mask"

In [None]:
#iterate through all scenes and create masks
for filename in os.listdir(folder_image_path):
  file_name_without_extension = os.path.splitext(os.path.basename(filename))[0]
  image_path = os.path.join(folder_image_path, filename)
  json_path = os.path.join(folder_json_path, file_name_without_extension + '.json')
  create_mask_from_json(json_path, image_path, folder_mask_path)



Crop images and create train, test datasets

In [None]:
import os
import cv2
import numpy as np
#function to crop an image
def crop_image_and_mask(image_path='', mask_path='', height=128, width=128):
    # Create lists to store cropped images and masks
    cropped_images = []
    cropped_masks = []

    image = cv2.imread(image_path) #bgr image
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    mask = cv2.imread(mask_path, cv2.IMREAD_GRAYSCALE)
    # Crop the image and mask to the specified size
    img_width = image.shape[1]
    img_height = image.shape[0]
    for i in range(0, img_height, height):
        for j in range(0, img_width, width):
            cropped_image = image[i:min(i+height, img_height), j:min(j+width, img_width)]
            cropped_mask = mask[i:min(i+height, img_height), j:min(j+width, img_width)]
            # Ensure the cropped image and mask have the correct size
            if cropped_image.shape[:2] == (height, width) and cropped_mask.shape == (height, width):
                # Convert the binary mask to 0 and 1
                # cropped_mask [cropped_mask == 255] = 1

                # Append the cropped image and mask to the lists
                cropped_images.append(cropped_image)
                cropped_masks.append(cropped_mask)

            else:
                padded_img = np.zeros((height, width, 3), dtype=cropped_images[-1].dtype)
                padded_mask = np.zeros((height, width), dtype=cropped_masks[-1].dtype)
                padded_img[:cropped_image.shape[0], :cropped_image.shape[1], :] = cropped_image
                padded_mask[:cropped_mask.shape[0], :cropped_mask.shape[1]] = cropped_mask
                # Append the cropped image and mask to the lists
                cropped_images.append(padded_img)
                cropped_masks.append(padded_mask)

    # Convert lists to NumPy arrays
    cropped_images_array = np.array(cropped_images)
    cropped_masks_array = np.array(cropped_masks)
    cropped_masks_array[cropped_masks_array==255]=1
    return (cropped_images_array, cropped_masks_array)

In [None]:
import numpy as np
def save_to_file(data, file_path):
  if os.path.exists(file_path):
    # Remove the file
    os.remove(file_path)

  np.save(file_path, data)

In [None]:
#create 2 lists to store names of images and masks
image_folder = "/content/drive/MyDrive/GR2/Scene"
mask_folder = "/content/drive/MyDrive/GR2/Mask"
image_paths = []
mask_paths = []
for file in os.listdir(image_folder):
  mask_paths.append(os.path.join(mask_folder, file))
  image_paths.append(os.path.join(image_folder, file))

print(image_paths)
print(mask_paths)

['/content/drive/MyDrive/GR2/Scene/kho_xang_h6.tif', '/content/drive/MyDrive/GR2/Scene/vinaconex.tif', '/content/drive/MyDrive/GR2/Scene/vd34.tif', '/content/drive/MyDrive/GR2/Scene/vd5.tif', '/content/drive/MyDrive/GR2/Scene/chua_kham_son.tif', '/content/drive/MyDrive/GR2/Scene/sun_hoabinh_sangolf.tif', '/content/drive/MyDrive/GR2/Scene/sun_hoabinh_doithung.tif', '/content/drive/MyDrive/GR2/Scene/vd2.tif', '/content/drive/MyDrive/GR2/Scene/vd1.tif', '/content/drive/MyDrive/GR2/Scene/sun_hoabinh_cuoiha.tif']
['/content/drive/MyDrive/GR2/Mask/kho_xang_h6.tif', '/content/drive/MyDrive/GR2/Mask/vinaconex.tif', '/content/drive/MyDrive/GR2/Mask/vd34.tif', '/content/drive/MyDrive/GR2/Mask/vd5.tif', '/content/drive/MyDrive/GR2/Mask/chua_kham_son.tif', '/content/drive/MyDrive/GR2/Mask/sun_hoabinh_sangolf.tif', '/content/drive/MyDrive/GR2/Mask/sun_hoabinh_doithung.tif', '/content/drive/MyDrive/GR2/Mask/vd2.tif', '/content/drive/MyDrive/GR2/Mask/vd1.tif', '/content/drive/MyDrive/GR2/Mask/sun_hoa

In [None]:
#crop and save to train, test datasets
from sklearn.model_selection import train_test_split
import os
import numpy as np

train_image = "/content/drive/MyDrive/GR2/Dataset/Train/Image"
train_mask = "/content/drive/MyDrive/GR2/Dataset/Train/Mask"

test_image = "/content/drive/MyDrive/GR2/Dataset/Test/Image"
test_mask = "/content/drive/MyDrive/GR2/Dataset/Test/Mask"

for img_path, msk_path in zip(image_paths, mask_paths):
  print("Cropping image " + os.path.basename(img_path))
  file_name_without_extension = os.path.splitext(os.path.basename(img_path))[0]
  images, masks = crop_image_and_mask(img_path, msk_path,256,256)

  images = images.astype(np.float32)
  images = images / 255.0
  masks = masks.astype(np.float32)

  X_train, X_test, y_train, y_test = train_test_split(images, masks, test_size=0.3, random_state=42)

  save_to_file(X_train, os.path.join(train_image, file_name_without_extension + '.npy'))
  save_to_file(y_train, os.path.join(train_mask, file_name_without_extension + '.npy'))

  save_to_file(X_test, os.path.join(test_image, file_name_without_extension + '.npy'))
  save_to_file(y_test, os.path.join(test_mask, file_name_without_extension + '.npy'))



Cropping image kho_xang_h6.tif
Cropping image vinaconex.tif
Cropping image vd34.tif
Cropping image vd5.tif
Cropping image chua_kham_son.tif
Cropping image sun_hoabinh_sangolf.tif
Cropping image sun_hoabinh_doithung.tif
Cropping image vd2.tif
Cropping image vd1.tif
Cropping image sun_hoabinh_cuoiha.tif
