In [1]:
import os
from glob import glob
import rasterio
import numpy as np
import pandas as pd
import cv2
import matplotlib.pyplot as plt

In [2]:
base_dest_dir = '/datasets/rpartsey/satellite/planet/2017-su-au-1_2017-sp-1_2017-sp-2_256x256'
dest_image_dir = os.path.join(base_dest_dir, 'images')
dest_mask_dir = os.path.join(base_dest_dir, 'masks')

In [3]:
images = glob(os.path.join(dest_image_dir, '*.tif'))
masks = glob(os.path.join(dest_mask_dir, '*.tif'))

data = {
    'image': sorted(images),
    'mask': sorted(masks)
}
df = pd.DataFrame(data)

In [4]:
print(df.shape)
df.head()

(42553, 2)


Unnamed: 0,image,mask
0,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...
1,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...
2,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...
3,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...
4,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...


In [5]:
black_images = []

for _, (image_path, mask_path) in df.iterrows():
    with rasterio.open(image_path) as image_src:
        with rasterio.open(mask_path) as mask_src:
            bands = image_src.read()
            data_mask = bands.sum(axis=0).astype(bool).astype(np.uint8)
            
            mask = mask_src.read()
#             print(data_mask.sum(), (data_mask.sum() < ((256*256) // 2)), not mask.any())
            black_images.append((data_mask.sum() < ((256*256) // 2)) and not mask.any())
    
black_images = np.array(black_images)
print('Number of black images:', np.sum(black_images))

Number of black images: 5502


In [6]:
df = df[~black_images]
df.shape

(37051, 2)

In [7]:
mask_exists = []

for _, (image_path, mask_path) in df.iterrows():
    with rasterio.open(mask_path) as mask_src:
        mask = mask_src.read()
        mask_exists.append(mask.any())
        
mask_exists = np.array(mask_exists)
print('Number of images with burned areas:', mask_exists.sum())

Number of images with burned areas: 921


In [8]:
df['mask_exists'] = mask_exists
df['summer-autumn'] = df['image'].map(lambda path: 'summer-autumn' in path)
df['spring'] = df['image'].map(lambda path: 'spring' in path)

print('Numner of spring images:', df['spring'].sum())
print('Numner of summer-autumn images:', df['summer-autumn'].sum())

df.head()

Numner of spring images: 26171
Numner of summer-autumn images: 10880


Unnamed: 0,image,mask,mask_exists,summer-autumn,spring
1,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
2,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
6,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
7,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
8,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True


In [9]:
df.to_csv('/datasets/rpartsey/satellite/planet/2017-su-au-1_2017-sp-1_2017-sp-2_256x256_no_black.csv', index=False)

In [10]:
df = pd.read_csv('/datasets/rpartsey/satellite/planet/2017-su-au-1_2017-sp-1_2017-sp-2_256x256_no_black.csv')

In [11]:
df.head()

Unnamed: 0,image,mask,mask_exists,summer-autumn,spring
0,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
1,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
2,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
3,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True
4,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,False,False,True


In [12]:
spring_with_mask = df[df['spring'] & df['mask_exists']]
spring_without_mask = df[df['spring'] & (~df['mask_exists'])]

print(spring_with_mask.shape)
print(spring_without_mask.shape)

(843, 5)
(25328, 5)


In [13]:
summer_autumn_with_mask = df[df['summer-autumn'] & df['mask_exists']]
summer_autumn_without_mask = df[df['summer-autumn'] & (~df['mask_exists'])]

print(summer_autumn_with_mask.shape)
print(summer_autumn_without_mask.shape)

(78, 5)
(10802, 5)


In [14]:
num_samples = 2000

def sample_rows(df, n):
    return df.sample(min(df.shape[0], n))

sp_with_mask  = sample_rows(spring_with_mask, num_samples)
sp_without_mask  = sample_rows(spring_without_mask, num_samples)


su_au_with_mask  = sample_rows(summer_autumn_with_mask, num_samples)
su_au_without_mask  = sample_rows(summer_autumn_without_mask, num_samples)


In [21]:
small_df = pd.concat([sp_with_mask, sp_without_mask, su_au_with_mask, su_au_without_mask])

In [22]:
small_df.shape

(4921, 5)

In [26]:
small_df.head()

Unnamed: 0,image,mask,mask_exists,summer-autumn,spring
0,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,True,False,True
1,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,True,False,True
2,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,True,False,True
3,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,True,False,True
4,/datasets/rpartsey/satellite/planet/2017-su-au...,/datasets/rpartsey/satellite/planet/2017-su-au...,True,False,True


In [24]:
small_df.to_csv('/datasets/rpartsey/satellite/planet/2017-su-au-1_2017-sp-1_2017-sp-2_256x256_no_black_small.csv', index=False)

In [25]:
small_df = pd.read_csv('/datasets/rpartsey/satellite/planet/2017-su-au-1_2017-sp-1_2017-sp-2_256x256_no_black_small.csv')