In [0]:
## Extracts train and test images from 2528C.tif and 2930D.tif

In [0]:
## OPTIONAL - download data
# !aws s3 cp s3://eohackathon-covid19/Hackthon_Data/Gauteng/2528C.tif data/
# !aws s3 cp 's3://eohackathon-covid19/Hackthon_Data/Kwazulu Natal/2930D.tif' data/

##### Imports

In [0]:
import sys, os, random                                                                                                                                                      
import numpy as np, pandas as pd   
import rasterio
from PIL import Image

In [0]:
RANDOM_STATE = 41
def fix_seed(seed=0):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)
fix_seed(RANDOM_STATE)

In [0]:
##./data should contain:
#2528C.tif
#2930D.tif
#Train.csv
#Test.csv
DATA_DIR = 'data/'

df_train = pd.read_csv(f'{DATA_DIR}/Train.csv')
df_test = pd.read_csv(f'{DATA_DIR}/Test.csv')

In [0]:
ds_train = rasterio.open(f'{DATA_DIR}/2528C.tif')

In [0]:
band_names = ds_train.descriptions
n_bands = ds_train.count
band_names, n_bands

((None, None, None), 3)

#### Extract images

In [0]:
sz=200 ##image size
# 200*1.5 m/px=300m2

In [0]:
#process train
os.makedirs(f'{DATA_DIR}/train',exist_ok=False)
# Load bands into RAM
red, green, blue = ds_train.read(1), ds_train.read(2), ds_train.read(3)

for _,sample in df_train.iterrows():
    ID, lat, lon, label = sample

    # Blank image
    im = np.zeros((sz,sz,3), np.uint8)

    # Get pixel coords
    row, col = ds_train.index(lon, lat)
    
    # Add image data
    for i, band in enumerate([red, green, blue]):
        im[:,:,i] = band[row-sz//2:row+sz//2, col-sz//2:col+sz//2]
  
    # Save image
    im = Image.fromarray(im)
    pth = f'{DATA_DIR}/train/'
    im.save(f'{pth}{ID}.jpg')

HBox(children=(FloatProgress(value=0.0, max=4281.0), HTML(value='')))




In [0]:
# !ls -1 data/train | wc -l

4281


In [0]:
ds_test = rasterio.open(f'{DATA_DIR}/2930D.tif')

In [0]:
# process test
os.makedirs(f'{DATA_DIR}/test/',exist_ok=False)
red, green, blue = ds_test.read(1), ds_test.read(2), ds_test.read(3)

for _,sample in df_test.iterrows():
    ID, lat, lon = sample
    # Blank image
    im = np.zeros((sz,sz,3), np.uint8)
    
    # Get pixel coords
    row, col = ds_test.index(lon, lat)
    
    # Add image data
    for i, band in enumerate([red, green, blue]):
        im[:,:,i] = band[row-sz//2:row+sz//2, col-sz//2:col+sz//2]
  
    # Save image
    im = Image.fromarray(im)
    pth = f'{DATA_DIR}/test/'
    im.save(f'{pth}{ID}.jpg')

HBox(children=(FloatProgress(value=0.0, max=2613.0), HTML(value='')))


