In [1]:
file_id = '14kkcuU6wd9UMvjaDrg3PNI-e_voCi8HL'
nwpu_images_path = 'NWPU_images.zip'

#### Download NWPU-RESISC45 satellite imagery (Google Drive)
Warning: will download 405MB. Will only use images of lakes, and then only those that already have labels.

In [2]:
import requests

# From Part_1_GettingStarted.ipynb
# Which is in turn from https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
def get_file(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        
        return None
    
    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        
        with open(destination, 'wb') as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # Filter out keep-alive new chunks
                    f.write(chunk)
    
    URL = 'https://docs.google.com/uc?export=download'
    
    session = requests.Session()
    
    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)
    
    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
    
    save_response_content(response, destination)

In [6]:
import zipfile

def unzip(path):
    with zipfile.ZipFile(path, 'r') as zip_ref:
        zip_ref.extractall()

In [4]:
get_file(file_id, nwpu_images_path)

In [7]:
unzip(nwpu_images_path)

In [8]:
import shutil, os

# Rename folder for clarity
try:
    os.rename('images', 'nwpu_images')
except:
    pass

# Remove everything that isn't a picture of a lake
non_lake_subdirs = [s for s in [x[0] for x in os.walk('nwpu_images')][1:] if 'lake' not in s]
for subdir in non_lake_subdirs:
    shutil.rmtree(subdir, ignore_errors=True)

# Rename lake subdir to 'data'
os.rename('nwpu_images' + os.sep + 'lake', 'nwpu_images' + os.sep + 'data')

#### Create labels
This is a manual process, and must be done through [makesense.io](https://makesense.io). We'll use labels that were already created/in the repository.

#### Rename, segment into folders (training_images, validation_images, test_images)

In [9]:
import json
import os
import shutil

def move_images_from_polys(polys, folder_name):
    for filename in polys:
        shutil.move(
            'nwpu_images' + os.sep + 'data' + os.sep + filename,
            'nwpu_images' + os.sep + 'data' + os.sep + folder_name + os.sep + filename)

In [10]:
training_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_30samples.json'))
validation_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_20samplesA.json'))
test_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_20samplesB.json'))

try:
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'training')
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'validation')
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'test')

    move_images_from_polys(training_polys, 'training')
    move_images_from_polys(validation_polys, 'validation')
    move_images_from_polys(test_polys, 'test')
except:
    pass

In [11]:
for filename in os.listdir('nwpu_images' + os.sep + 'data'):
    if (filename.endswith('.jpg')):
        os.remove('nwpu_images' + os.sep + 'data' + os.sep + filename)

#### Make mask images

In [52]:
def get_data(poly):
    X = []; Y = []
    for k in poly['regions']: # For each polygon...
        X.append(poly['regions'][k]['shape_attributes']['all_points_x'])
        Y.append(poly['regions'][k]['shape_attributes']['all_points_y'])
    return Y, X # JSON coordinates are flipped relative to image

In [78]:
from PIL import Image, ImageDraw
import matplotlib
import matplotlib.image
import numpy as np

def write_mask(image_name, poly, image, folder_name):
    X, Y = get_data(poly)
    
    # Get image dimensions
    nx, ny, nz = np.shape(image)
    mask = np.zeros((ny, nx))
    
    for x, y in zip(X, Y):
        # Interweave xs and ys
        polygon = np.vstack((x, y)).reshape((-1,), order='F').tolist()
        
        # Create mask image based on polygon
        if nx != ny:
            x, y = y, x
            img = Image.new('L', (ny, nx), 0)
        else:
            img = Image.new('L', (nx, ny), 0)
        
        ImageDraw.Draw(img).polygon(polygon, outline=1, fill=1)
        
        # Turn into numpy array
        m = np.flipud(np.rot90(np.array(img)))
        try:
            mask = mask + m
        except:
            mask = mask + m.T
    
    matplotlib.image.imsave(
        'nwpu_label_imagery' + os.sep + 'data' + os.sep + folder_name + os.sep + image_name + '_mask.jpg',
        mask.astype('uint8'))

In [80]:
def write_masks(polys, images, folder_name):
    for image_name in polys:
        write_mask(image_name, polys[image_name], images[image_name], folder_name)

In [70]:
import rasterio

def get_images(image_names, folder_name):
    images = {}
    for image_name in image_names:
        with rasterio.open('nwpu_images' + os.sep + 'data' + os.sep + folder_name + os.sep + image_name) as dataset:
            images[image_name] = dataset.read().T
    return images

In [81]:
try:
    os.mkdir('nwpu_label_imagery')
    os.mkdir('nwpu_label_imagery' + os.sep + 'data')
    os.mkdir('nwpu_label_imagery' + os.sep + 'data' + os.sep + 'training')
    os.mkdir('nwpu_label_imagery' + os.sep + 'data' + os.sep + 'validation')
    os.mkdir('nwpu_label_imagery' + os.sep + 'data' + os.sep + 'test')
except:
    pass

training_images = get_images(training_polys, 'training')
validation_images = get_images(validation_polys, 'validation')
test_images = get_images(test_polys, 'test')

write_masks(training_polys, training_images, 'training')
write_masks(validation_polys, validation_images, 'validation')
write_masks(test_polys, test_images, 'test')