In [None]:
import os
import json

from kaggle_secrets import UserSecretsClient

In [None]:
secrets = UserSecretsClient()

os.environ['KAGGLE_USERNAME'] = secrets.get_secret("KAGGLE_USERNAME")
os.environ['KAGGLE_KEY'] = secrets.get_secret("KAGGLE_KEY")

In [None]:
os.makedirs('/kaggle/dataset/', exist_ok=True)


# Change below
meta = dict(
    id="uom200399g/flair-dataset-small",
    title="FLAIR Dataset",
    isPrivate=True,
    licenses=[dict(name="other")]
)

with open('/kaggle/dataset/dataset-metadata.json', 'w') as f:
    json.dump(meta, f)

In [None]:
os.makedirs('/kaggle/dataset/train', exist_ok=True)
os.makedirs('/kaggle/dataset/train/images', exist_ok=True)
os.makedirs('/kaggle/dataset/train/labels', exist_ok=True)
os.makedirs('/kaggle/dataset/test', exist_ok=True)
os.makedirs('/kaggle/dataset/test/images', exist_ok=True)
os.makedirs('/kaggle/dataset/test/labels', exist_ok=True)

In [None]:
!pip install ultralytics rasterio

In [None]:
import cv2
import numpy as np
from matplotlib.patches import Polygon
import matplotlib.pyplot as plt
from PIL import Image
import rasterio
import glob
import os
import random

In [None]:
# def read_img(raster_file: str) -> np.ndarray:
#     with rasterio.open(raster_file) as src_img:
#         array = src_img.read()
#         return cv2.cvtColor(array.transpose(1,2,0)[:,:,:3], cv2.COLOR_BGR2RGB)

# def read_msk(raster_file: str) -> np.ndarray:
#     with rasterio.open(raster_file) as src_msk:
#         array = src_msk.read()[0]-1
#         array = np.stack([array == i for i in range(19)], axis=0)
#         return array.astype(np.uint8) * 255
    
    
# def generate_mask(path: str, fileUrl: str):
    
#     mask = read_msk(fileUrl)
#     assert  mask.shape == (19, 512, 512)
    
#     text = ""
    
#     for i in range(19):
#         contours, _ = cv2.findContours(mask[i,:,:], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
#         if len(contours) <= 0:
#             continue
        
#         text += f"{i} "
#         for contour in contours:
#             points = contour.reshape(-1, 2)
#             text += ' '.join(f'{x/512:.9f} {y/512:.9f} ' for x, y in points)
#         text += "\n"
        
#     with open(f'{path}', 'w') as f:
#         f.write(text)
        
# def generate_img(path: str, fileUrl: str):
#     image = read_img(fileUrl)
#     assert  image.shape == (512, 512, 3)
#     cv2.imwrite(f'{path}', image)

In [None]:
def read_img(raster_file: str) -> np.ndarray:
    with rasterio.open(raster_file) as src_img:
        array = src_img.read()
        return cv2.cvtColor(array.transpose(1,2,0)[:,:,:3], cv2.COLOR_BGR2RGB)

def read_msk(raster_file: str) -> np.ndarray:
    with rasterio.open(raster_file) as src_msk:
        array = src_msk.read()[0]-1
        array = np.stack([array == i for i in range(19)], axis=0)
        return array.astype(np.uint8) * 255
    
    
class_map = {
    0: 3,
    1: 3,
    2: 3,
    3: 4,
    4: 5,
    5: 0,
    6: 0,
    7: 1,
    8: 2,
    9: 2,
    10: 2,
    11: 2,
    12: 3,
    13: 5,
    14: 2,
    15: 0,
    16: 0,
    17: 3,
    18: 6
}

def generate_mask(path: str, fileUrl: str):
    mask = read_msk(fileUrl)

    texts = {
        0: '0 ',
        1: '1 ',
        2: '2 ',
        3: '3 ',
        4: '4 ',
        5: '5 ',
        6: '6 '
    }

    assert  mask.shape == (19, 512, 512)
    for i in range(19):

        # print(i)
        # plt.imshow(mask[i, :, :], cmap='gray')
        # plt.show()

        contours, _ = cv2.findContours(mask[i,:,:], cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
        if len(contours) <= 0:
            continue
        classIndex = class_map[i]
        contours_text = texts[classIndex]
        for contour in contours:
            points = contour.reshape(-1, 2)
            contours_text += ' '.join(f'{x/512:.9f} {y/512:.9f} ' for x, y in points)
        texts[classIndex] = contours_text

    with open(f'{path}', 'w') as f:
        text = ''
        for key in texts.keys():
            if len(texts[key]) < 3:
                continue
            text += texts[key] + '\n'
        f.write(text)
        
def generate_img(path: str, fileUrl: str):
    image = read_img(fileUrl)
    assert  image.shape == (512, 512, 3)
    cv2.imwrite(f'{path}', cv2.resize(image, (256, 256), interpolation= cv2.INTER_LINEAR))

In [None]:
def plot_msks(img_path, msk_path):
    content = ""
    with open(msk_path) as f:
        content = f.read()

    polygons = list(map(lambda x: (x[2:].split(" ")), content.split("\n")))

    for i in polygons:
        polygon = []
        for point in i:
            if len(point) > 0:
                polygon.append(float(point))

        # print(polygon)

        point_list = []
        for j in range(0, len(polygon), 2):
            point_list.append((polygon[j]*512, polygon[j+1]*512))

        # print(point_list)

        if len(point_list) > 0:

            fig, ax = plt.subplots(1,1)
            ax.add_patch(Polygon(point_list, alpha=0.4, color=(random.random(), random.random(), random.random())))

            img = cv2.imread(img_path)

            plt.imshow(img)

            plt.ylim(0,512)
            plt.xlim(0,512)

            plt.show()

In [None]:
def read_tif_files(split: str):
    
    if split == 'train':
        img_dir = '/kaggle/input/flair-challenge-dataset-unofficial/flair_aerial_train/flair_aerial_train/'
        msk_dir = '/kaggle/input/flair-challenge-dataset-unofficial/flair_labels_train/flair_labels_train/'
        save_loc = '/kaggle/dataset/train/'
        limit = 30000
    else:
        img_dir = '/kaggle/input/flair-challenge-dataset-unofficial/flair_1_aerial_test/flair_1_aerial_test/'
        msk_dir = '/kaggle/input/flair-challenge-dataset-unofficial/flair_1_labels_test/flair_1_labels_test/'
        save_loc = '/kaggle/dataset/test/'
        limit = 10000
    
    pattern = os.path.join(img_dir, '**', 'IMG_*.tif')
    
    i = 0
    
    for img_file_path in glob.iglob(pattern, recursive=True):
        
        print(i)
        
        if i >= limit:
            break
        
        msk_file_path = img_file_path.replace('IMG_', 'MSK_').replace('img', 'msk').replace(img_dir, msk_dir)
        print('analysing:', img_file_path, msk_file_path)

        name = img_file_path.split('/')[-1]
        image_save_path = os.path.join(save_loc, 'images', name[:-4] + '.jpg')
        mask_save_path = os.path.join(save_loc, 'labels', name[:-4] + '.txt')
        print('save paths:', image_save_path, mask_save_path)

        generate_mask(mask_save_path, msk_file_path)
        generate_img(image_save_path, img_file_path)
        
#         plot_msks(image_save_path, mask_save_path)
        
        i += 1

In [None]:
read_tif_files('train')

In [None]:
read_tif_files('test')

In [None]:
!kaggle datasets create -p "/kaggle/dataset" --dir-mode zip

In [None]:
# !kaggle datasets version -p "/kaggle/dataset" -m "Updated via notebook" --dir-mode zip