In [None]:
file_id = '14kkcuU6wd9UMvjaDrg3PNI-e_voCi8HL'
nwpu_images_path = 'NWPU_images.zip'

#### Download NWPU-RESISC45 satellite imagery (Google Drive)
Warning: will download 405MB. Will only use images of lakes, and then only those that already have labels.

In [3]:
import requests

# From Part_1_GettingStarted.ipynb
# Which is in turn from https://stackoverflow.com/questions/38511444/python-download-files-from-google-drive-using-url
def get_file(id, destination):
    def get_confirm_token(response):
        for key, value in response.cookies.items():
            if key.startswith('download_warning'):
                return value
        
        return None
    
    def save_response_content(response, destination):
        CHUNK_SIZE = 32768
        
        with open(destination, 'wb') as f:
            for chunk in response.iter_content(CHUNK_SIZE):
                if chunk: # Filter out keep-alive new chunks
                    f.write(chunk)
    
    URL = 'https://docs.google.com/uc?export=download'
    
    session = requests.Session()
    
    response = session.get(URL, params = { 'id' : id }, stream = True)
    token = get_confirm_token(response)
    
    if token:
        params = { 'id' : id, 'confirm' : token }
        response = session.get(URL, params = params, stream = True)
    
    save_response_content(response, destination)

In [None]:
import zipfile

def unzip(path):
    with zipfile.ZipFile(f, 'r') as zip_ref:
        zip_ref.extractall()

In [None]:
import shutil, os

get_file(file_id, nwpu_images_path)
unzip(nwpu_images_path)

# Rename folder for clarity
try:
    os.rename('images', 'nwpu_images')
except:
    pass

# Remove everything that isn't a picture of a lake
non_lake_subdirs = [s for s in [x[0] for x in os.walk('nwpu_images')][1:] if 'lake' not in s]
for subdir in non_lake_subdirs:
    shutil.rmtree(subdir, ignore_errors=True)

# Rename lake subdir to 'data'
os.rename('nwpu_images' + os.sep + 'lake', 'nwpu_images' + os.sep + 'data')

#### Create labels
This is a manual process, and must be done through [makesense.io](https://makesense.io). We'll use labels that were already created/in the repository.

#### Rename, segment into folders (training_images, validation_images, test_images)

In [36]:
import json
import os
import shutil

def move_images_from_polys(polys, folder_name):
    for filename in polys:
        shutil.move(
            'nwpu_images' + os.sep + 'data' + os.sep + filename,
            'nwpu_images' + os.sep + 'data' + os.sep + folder_name + os.sep + filename)

In [37]:
training_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_30samples.json'))
validation_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_20samplesA.json'))
test_polys = json.load(open('nwpu_labels' + os.sep + 'nwpu_lakes_20samplesB.json'))

try:
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'training')
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'validation')
    os.mkdir('nwpu_images' + os.sep + 'data' + os.sep + 'test')

    move_images_from_polys(training_polys, 'training')
    move_images_from_polys(validation_polys, 'validation')
    move_images_from_polys(test_polys, 'test')
except:
    pass

In [42]:
for filename in os.listdir('nwpu_images' + os.sep + 'data'):
    if (filename.endswith('.jpg')):
        os.remove('nwpu_images' + os.sep + 'data' + os.sep + filename)