## Example for batch processing of images
How to select RGB bands from a set of multiband images in a folder

In [1]:
import numpy as np
import os
import skimage.io as skio
import matplotlib.pyplot as plt
import pandas as pd
from glob import glob
import imageio

In [3]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


#### Functions

In [2]:
def select_rgb_bands_from_multiband(f_in, f_out, rgb_bands = [3, 2, 1], 
                                    crop_min_perc = 0.5, crop_max_perc = 99.5):
    ## Read input multiband image
    mat_in = skio.imread(f_in, plugin="tifffile")
    
    ## Select bands
    mat_out = mat_in[:,:, [3, 2, 1]]
    
    ## Scale img in each band (using percentiles, as it's more robust to outliers)
    for i in [0, 1, 2]:
        scmin = np.percentile(mat_out[:, :, i], crop_min_perc)
        scmax = np.percentile(mat_out[:, :, i], crop_max_perc)
        delta = scmax - scmin
        mat_out[:, :, i] = ((mat_out[:, :, i] - scmin) / delta * 255).astype(int)
        
    ## Crop values that are out of range (defined by percentiles)
    mat_out[mat_out < 0] = 0
    mat_out[mat_out > 255] = 255

    ## Create output folder
    try:
        out_dir = os.path.dirname(f_out)
        if os.path.exists(out_dir) == False :
            os.makedirs(out_dir)
    except:
        print('Error creating output folder, skip image')
        return;
    
    ## Save image
    try:
        imageio.imsave(f_out, mat_out.astype(np.uint8))    
    except:
        print('Error writing output image, skip image')
        return;
    

#### Read list of images

In [4]:
## Find all images
#indir = '../inputs/'
indir = '/content/drive/MyDrive/CommonFiles/MUSA650-Data/Prj_EuroSat'

f_all_mb = os.path.join(indir, 'data_small', 'multiband', '*/*.tif')
in_img_list = glob(f_all_mb)

## Save to dataframe
df_in = pd.DataFrame(data = in_img_list, columns = ['FileName'])

In [5]:
df_in.head()

Unnamed: 0,FileName
0,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
1,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
2,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
3,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
4,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...


In [7]:
df_in.loc[0].FileName

'/content/drive/MyDrive/CommonFiles/MUSA650-Data/Prj_EuroSat/data_small/multiband/AnnualCrop/AnnualCrop_241.tif'

In [8]:
## Detect categories
df_in['Category'] = df_in.FileName.str.split(os.sep).str[9]
df_in.groupby('Category').count()

Unnamed: 0_level_0,FileName
Category,Unnamed: 1_level_1
AnnualCrop,50
Industrial,50
Residential,50


In [9]:
## Detect image names
df_in['ImageName'] = df_in.FileName.str.split(os.sep).str[10].str.replace('.tif', '', regex = False)
df_in = df_in[['ImageName', 'Category', 'FileName']]
df_in.head()

Unnamed: 0,ImageName,Category,FileName
0,AnnualCrop_241,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
1,AnnualCrop_1528,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
2,AnnualCrop_1891,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
3,AnnualCrop_465,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...
4,AnnualCrop_233,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...


In [11]:
df_in['ImageIndex'] = df_in.ImageName.str.split('_').str[1].astype(int)

In [12]:
df_in.head()

Unnamed: 0,ImageName,Category,FileName,ImageIndex
0,AnnualCrop_241,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...,241
1,AnnualCrop_1528,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...,1528
2,AnnualCrop_1891,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...,1891
3,AnnualCrop_465,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...,465
4,AnnualCrop_233,AnnualCrop,/content/drive/MyDrive/CommonFiles/MUSA650-Dat...,233


#### Extract RGB bands from all images

In [13]:
#outdir = '../outputs/bands_rgb'
outdir = '/content/drive/MyDrive/CommonFiles/MUSA650-Data/Prj_EuroSat/outclassrun'

for i, tmp_ind in enumerate(df_in.index):
    tmp_name = df_in.loc[tmp_ind, 'ImageName']
    tmp_cat = df_in.loc[tmp_ind, 'Category']
    tmp_file = df_in.loc[tmp_ind, 'FileName']
    print('Extracting RGB bands from image ' + str(i) + ' : ' + tmp_name)
    f_out = os.path.join(outdir, tmp_cat, tmp_name + '_my_rgb.png')
    select_rgb_bands_from_multiband(tmp_file, f_out)
    
    ## input('Enter a key to continue')   ## For testing the initial run

Extracting RGB bands from image 0 : AnnualCrop_241
Extracting RGB bands from image 1 : AnnualCrop_1528
Extracting RGB bands from image 2 : AnnualCrop_1891
Extracting RGB bands from image 3 : AnnualCrop_465
Extracting RGB bands from image 4 : AnnualCrop_233
Extracting RGB bands from image 5 : AnnualCrop_621
Extracting RGB bands from image 6 : AnnualCrop_68
Extracting RGB bands from image 7 : AnnualCrop_255
Extracting RGB bands from image 8 : AnnualCrop_2544
Extracting RGB bands from image 9 : AnnualCrop_1943
Extracting RGB bands from image 10 : AnnualCrop_1744
Extracting RGB bands from image 11 : AnnualCrop_119
Extracting RGB bands from image 12 : AnnualCrop_1432
Extracting RGB bands from image 13 : AnnualCrop_2844
Extracting RGB bands from image 14 : AnnualCrop_1775
Extracting RGB bands from image 15 : AnnualCrop_659
Extracting RGB bands from image 16 : AnnualCrop_1375
Extracting RGB bands from image 17 : AnnualCrop_2417
Extracting RGB bands from image 18 : AnnualCrop_230
Extracting RG