In [247]:
# Creates respective folder and generates the dataset for landcover.ai
# Augmentation used :
# Augments images with all the claases 10 times and others 3 times. 

In [25]:
#Params :
netid = 'nn2163'
version_no = '12'
multiplication_factor_all_features = 1
multiplication_factor_for_rest = 1

In [26]:
# The base_path and original base path is where actual 41 images of landcover.ai are kept 
# Using this script, we are generating augmented data in out_path

base_data_path = "/scratch/au2056/semantic_datasets/landcover/2/"
original_base_path = base_data_path + "original/"

In [250]:
!pip install --upgrade pip

Defaulting to user installation because normal site-packages is not writeable
Collecting pip
  Using cached pip-21.3.1-py3-none-any.whl (1.7 MB)
Installing collected packages: pip
Successfully installed pip-21.3.1
You should consider upgrading via the '/share/apps/python/3.8.6/intel/bin/python -m pip install --upgrade pip' command.[0m


In [5]:
import os, re, sys, random, shutil, cv2
import numpy as np
from PIL import ImageColor
from IPython.display import SVG
import matplotlib.pyplot as plt
%matplotlib inline
import albumentations as A
import segmentation_models_pytorch as smp

In [3]:
#TODO
def augment(width, height):
    transform = A.Compose([
        A.RandomCrop(width=width, height=height, p=1.0),
        A.HorizontalFlip(p=0.7),
        A.VerticalFlip(p=0.7),
        A.Rotate(limit=[60, 300], p=1.0, interpolation=cv2.INTER_NEAREST),
        A.RandomBrightnessContrast(brightness_limit=[-0.2, 0.3], contrast_limit=0.2, p=1.0),
        A.OneOf([
            A.CLAHE (clip_limit=1.5, tile_grid_size=(8, 8), p=0.5),
            A.GridDistortion(p=0.5),
            A.OpticalDistortion(distort_limit=1, shift_limit=0.5, interpolation=cv2.INTER_NEAREST, p=0.5),
        ], p=1.0),
    ], p=1.0)
    
    return transform

In [6]:
class_to_rgb_map = {}
class_to_rgb_map['unlabeled'] = np.array([0,0,0])
class_to_rgb_map['building'] = np.array([80,0,165])
class_to_rgb_map['woodland'] = np.array([255,204,0])
class_to_rgb_map['water'] = np.array([0,244,244])
class_to_rgb_map['roads'] = np.array([105,105,105])


class_names= ['unlabeled', 'building', 'woodland', 'water', 'roads']
class_codes = []

for cname in class_names:
    rgb_arr = class_to_rgb_map[cname]
    class_codes.append(tuple([rgb_arr[0], rgb_arr[1], rgb_arr[2]]))

#print("class codes (in order) are = \n" , class_codes)


imap = {k:v for k,v in enumerate(class_codes)}
nimap = {v:k for k,v in enumerate(class_names)}
inmap = {k:v for k,v in enumerate(class_names)}

print(imap)
print(nimap)
print(inmap)
# cmap = {v:k for k,v in enumerate(class_codes)}
# 
# print(cmap, "\n-----------------------------------------------------------------\n")
# print(nimap, "\n-----------------------------------------------------------------\n")
# print(inmap)

{0: (0, 0, 0), 1: (80, 0, 165), 2: (255, 204, 0), 3: (0, 244, 244), 4: (105, 105, 105)}
{'unlabeled': 0, 'building': 1, 'woodland': 2, 'water': 3, 'roads': 4}
{0: 'unlabeled', 1: 'building', 2: 'woodland', 3: 'water', 4: 'roads'}


In [7]:
def mask_contains_all(msk, nimap):
    one_hot = convert_to_onehot(msk, imap)
    bid = nimap['building']
    wid = nimap['water']
    rid = nimap['roads']
    lid = nimap['woodland']
    uid = nimap['unlabeled']
    if np.any(one_hot[:,:,bid]) and np.any(one_hot[:,:,wid]) and np.any(one_hot[:,:,rid]) and np.any(one_hot[:,:,lid]) and np.any(one_hot[:,:,uid]):
        return True

In [16]:
def convert_to_onehot(img, mapping = imap):
    nclasses = len(mapping)
    shape = img.shape[:2]+(nclasses,)
    ret = np.zeros(shape, dtype=np.int8)
    for idx, cls in enumerate(mapping):
        ret[:,:,idx] = np.all(img.reshape( (-1,3) ) == mapping[idx], axis=1).reshape(shape[:2])
    return ret

def convert_to_rgb(onehot, mapping = imap):
    val = np.argmax(onehot, axis=-1)
    ret = np.zeros( onehot.shape[:2]+(3,) )
    for m in mapping.keys():
        ret[val == m] = mapping[m]
    return np.uint8(ret)

In [27]:
# to shuffle all the images for random splitting
fnames = [ f.name for f in os.scandir(original_base_path + "/images/") if not f.is_dir() ]
np.random.shuffle(fnames)
#fnames = fnames[:10]
print(len(fnames))

10674


In [28]:
transform_fn = {}
transform_fn[1] = augment(512, 512)

In [29]:
def create_directory(path):
    isExist = os.path.exists(path)
    if not isExist:  
      os.makedirs(path)
      print("The new directory is created!")
    else:
      print("The directory already exists!")

In [30]:
#folder creation :

root_path = '/scratch/'
directory = '/cv/semantic_datasets2/landcover/'
subdirectory_image = '/images/'
subdirectory_masks = '/masks/'
path = root_path + netid + directory + version_no
path_to_images = path + subdirectory_image
path_to_masks = path + subdirectory_masks

create_directory(path)
create_directory(path_to_images)
create_directory(path_to_masks)

The new directory is created!
The new directory is created!
The new directory is created!


In [31]:
out_path = path + "/{}/{}"

In [32]:
print(out_path)

/scratch/nn2163/cv/semantic_datasets2/landcover/12/{}/{}


In [33]:
files = []
def create_dataset(count_all, count_rest, nimap, fnames):
    img_folder_path = original_base_path + "/images/"
    msk_folder_path = original_base_path + "/masks/"
    for img_name in fnames:
        img_path = img_folder_path + img_name
        msk_path = msk_folder_path + img_name.replace(".jpg", "_m.png")
        img = cv2.imread(img_path)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        msk = cv2.imread(msk_path)
        msk = cv2.cvtColor(msk, cv2.COLOR_BGR2RGB)
        
        #targeted augmentation
        times = 1
        if mask_contains_all(msk, nimap):
            times = count_all
        else:
            times = count_rest
            
        for idx in range(times):
            transformed = transform_fn[1](image=img, mask=msk)
            transformed_image = transformed['image']
            transformed_mask = transformed['mask']
            img_out_path = out_path.format("images", str(idx + 1) + "_" + img_name)
            cur_name = str(idx + 1) + "_" + img_name.replace(".jpg", "")
            files.append(cur_name)
            #print(cur_name)
            msk_out_path = out_path.format("masks",  str(idx + 1) + "_" + img_name.replace(".jpg", ".png"))
            cv2.imwrite(img_out_path, cv2.cvtColor(transformed_image, cv2.COLOR_BGR2RGB))
            cv2.imwrite(msk_out_path, cv2.cvtColor(transformed_mask, cv2.COLOR_BGR2RGB))
            
            

In [None]:
#create_dataset(multiplication_factor_all_features, multiplication_factor_for_rest, nimaps, fnames)
create_dataset(multiplication_factor_all_features, multiplication_factor_for_rest, nimap, fnames)
print(len(files))

In [36]:
train = files[:int(len(files)*0.8)] #[1, 2, 3, 4, 5, 6, 7, 8]
validate = files[int(len(files)*0.8):int(len(files)*0.9)] #[9]
test = files[int(len(files)*0.9):] #[10]
print(len(train))
print(len(validate))
print(len(test))

8539
1067
1068


In [37]:
print(path)
base_path = path + "/"
print(base_path)

/scratch/nn2163/cv/semantic_datasets2/landcover/12
/scratch/nn2163/cv/semantic_datasets2/landcover/12/


In [38]:
def create_sub_dir(data_category):
    # define the name of the directory to be created
    path_test_imgs = base_path + data_category + "/images/"
    path_test_msks = base_path + data_category + "/masks/"
    create_directory(path_test_imgs)
    create_directory(path_test_msks)
        

In [39]:
create_sub_dir("train")
create_sub_dir("validate")
create_sub_dir("test")

The new directory is created!
The new directory is created!
The new directory is created!
The new directory is created!
The new directory is created!
The new directory is created!


In [40]:
import shutil, os

def move_dataset(data_cat, data_cat_name):
    img_folder_path = base_path + "images/"
    msk_folder_path = base_path + "masks/"
    img_out_folder_path = base_path + data_cat_name + "/images/"
    msk_out_folder_path = base_path + data_cat_name + "/masks/"
    
    try :
        for img_name in data_cat:
            img_path = img_folder_path + img_name + ".jpg"
            out_img = img_out_folder_path + img_name + ".jpg"
            msk_path = msk_folder_path + img_name + ".png"
            out_msk = msk_out_folder_path + img_name + ".png"
            shutil.move(img_path, out_img) 
            shutil.move(msk_path, out_msk)  
    except :
            print("Data already moved")

In [41]:
move_dataset(train, "train")
move_dataset(validate, "validate")
move_dataset(test, "test")