# Create Dataset

In [1]:
from PIL import Image
import numpy as np
import rasterio
import matplotlib.pyplot as plt
import pickle
import sat_utils
import tifffile as tiff
import os
import natsort as ns

folder_path='./data/images'

In [None]:
target = sat_utils.load_map_tiff('MUL-PanSharpen_AOI_5_Khartoum_img16_target.tif')
input = sat_utils.load_map_tiff('MUL-PanSharpen_AOI_5_Khartoum_img16_input.tif')

# Open same image with channels inverted
input2 = tiff.imread('MUL-PanSharpen_AOI_5_Khartoum_img16_input.tif').astype(np.float32)

print('input:', input.shape)
print('input2:', input2.shape)
print('target:', target.shape)

plt.imshow(target[0,:,:])
plt.title('Label');
plt.show()

plt.imshow(input[4,:,:])
plt.title('Input');
plt.show()

print('input:', input.shape)
print('target:', target.shape)

#### Crop and Display

In [None]:
crop_target = sat_utils.crop_img(target,0,0,76,76)
print('Original:',target.shape)
print('Cropped:',crop_target.shape)
print('Max target:', np.max(target))
print('Min target:', np.min(target))
plt.imshow(crop_target[0,:,:])
plt.show()

#crop_input = utils.crop_img(input,0,0,76,76)
#crop_input = utils.get_rgb(input2)

#rgb = utils.get_rgb(input2, channelsFirst=False)
#rgb = np.dstack((input2[:, :, 4], input2[:, :, 2], input2[:, :, 1]))

crop_input2 = sat_utils.crop_img(input2,0,0,76,76, channelsFirst=False)
rgb2 = sat_utils.get_rgb(crop_input2, channelsFirst=False)
rgb_norm2 = sat_utils.img_minmax_norm(rgb2, channelsFirst=False)
plt.imshow( rgb_norm2 )
plt.show()

crop_input1 = sat_utils.crop_img(input,0,0,76,76, channelsFirst=True)
rgb1 = sat_utils.get_rgb(crop_input1, channelsFirst=True)
rgb_norm1 = sat_utils.img_minmax_norm(rgb1, channelsFirst=True)
rgb_norm1 = np.moveaxis(rgb_norm1, 0, 2)
plt.imshow( rgb_norm1 )
plt.show()
print('Original:',input.shape)
print('Cropped:',crop_input1.shape)

In [None]:
dict_input, dict_output = sat_utils.crop_blocks(input, target, display=True, earlyStop=39)

### Save images to Pickle

In [None]:
with open('./data/input.pickle', 'wb') as handle:
    pickle.dump(dict_input, handle)

with open('./data/label.pickle', 'wb') as handle:
    pickle.dump(dict_output, handle)
    

#### Get List of files

In [2]:
input_paths_dict={}
target_paths_dict={}
for root, dirs, files in os.walk(folder_path):
        files = ns.natsorted(files)
        for file in files[0::]:
            img_path = os.path.join(root, file)
            parts = os.path.splitext(file)[0].split('_')
            img_id = parts[-2]
                       
            if parts[-1]=="input":
                input_paths_dict[img_id] = img_path
            if parts[-1]=="target":
                target_paths_dict[img_id] = img_path

#### Populate Dictionaries

In [3]:
all_input_dict={}
all_target_dct={}
count=0
for key, value in input_paths_dict.items():
    
    input = sat_utils.load_map_tiff(value)
    target = sat_utils.load_map_tiff(target_paths_dict[key])
    
    dict_input, dict_output = sat_utils.crop_blocks(input, target, display=False, earlyStop=39, offset=count*39)
    all_input_dict.update(dict_input)
    all_target_dct.update(dict_output)
    print('Elements added:', len(all_input_dict))
    count+=1

Elements added: 40
Elements added: 79
Elements added: 118
Elements added: 157
Elements added: 196
Elements added: 235
Elements added: 274
Elements added: 313
Elements added: 352
Elements added: 391
Elements added: 430
Elements added: 469
Elements added: 508
Elements added: 547
Elements added: 586
Elements added: 625
Elements added: 664
Elements added: 703
Elements added: 742
Elements added: 781
Elements added: 820
Elements added: 859
Elements added: 898
Elements added: 937
Elements added: 976
Elements added: 1015
Elements added: 1054
Elements added: 1093
Elements added: 1132
Elements added: 1171
Elements added: 1210
Elements added: 1249
Elements added: 1288
Elements added: 1327
Elements added: 1366
Elements added: 1405
Elements added: 1444
Elements added: 1483
Elements added: 1522
Elements added: 1561
Elements added: 1600
Elements added: 1639
Elements added: 1678
Elements added: 1717
Elements added: 1756
Elements added: 1795
Elements added: 1834
Elements added: 1873
Elements added: 191

#### Save Pickle for list of files

In [14]:
with open('./data/all_input.pickle', 'wb') as handle:
    pickle.dump(all_input_dict, handle)

with open('./data/all_label.pickle', 'wb') as handle:
    pickle.dump(all_target_dct, handle)

#### Clean dataset

In [10]:
def clean_data(in_dict, lbl_dict, min_perc=3.0):
    list_delete = []
    for key, val in all_target_dct.items():
        perc = sat_utils.get_info_percent(val[0,:,:])
        if perc < min_perc:
            list_delete.append(key)
    
    for key in list_delete:
        del in_dict[key]
        del lbl_dict[key]
    
    return in_dict, lbl_dict

In [12]:
in_dict, lbl_dict = sat_utils.clean_data(all_input_dict, all_target_dct)