# Data Generation

Images are all taken from the [HiRISE Satellite](https://hirise.lpl.arizona.edu/). These images can be programatically downloaded using the script below. When run in full, the script can take over a day to run depending on your connection speed. It will download approximately 50,000 images (64 GB).

The images are processed into 512x512 patches and saved individually. Using the full dataset will generate 644,000 images (64 GB). The image patches are then resized and saved at sizes 8x8, 16x16, 32x32, 64x64, 128x128 and 256x256. This is not necessary, but speeds up training of a progressive GAN. The sizes of the downscaled files range from 1.9 GB for the 8x8 dataset to 18 GB for the 256x256 dataset.

In [None]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
from fastai import *
from fastai.vision import *
import requests
from bs4 import BeautifulSoup

In [None]:
image_path = 'F:/Mars/full_size/' # path to save destination

url = 'https://hirise-pds.lpl.arizona.edu/PDS/EXTRAS/RDR/ESP/'
r = requests.get(url)
html = BeautifulSoup(r.content, "html.parser")
a = html.findAll('a') # anchor elements are all folders 

In [None]:
# script to download all images
# change iteration in first for loop to a count based method if you don't want to download the entire thing
for o_folder in a:
    folder = o_folder.text

    if 'ORB' in folder:
        print(folder)
        folder_url = url + folder

        r2 = requests.get(folder_url)
        html2 = BeautifulSoup(r2.content, "html.parser")
        a2 = html2.findAll('a')

        for e_folder in a2:
            sub_folder = e_folder.text
            if 'ESP' in sub_folder:
                print(sub_folder)
                sub_folder_url = folder_url + sub_folder

                r3 = requests.get(sub_folder_url)
                html3 = BeautifulSoup(r3.content, "html.parser")
                a3 = html3.findAll('a')

                try:
                    file_lr = [i.text for i in a3 if '_RGB.NOMAP.browse.jpg' in i.text][0]
                    !curl -sS {sub_folder_url+file_lr} > {image_path+file_lr}

                except:
                    print('\tfailed to find filename')

In [None]:
path = Path('G:/Mars/')
large_fnames = os.listdir(path/'full_size')

In [None]:
for f, fname in enumerate(large_fnames):
    
    if f%1000 == 0:
        print(f)
    
    im_id = fname.split('_RGB')[0]
    
    im = open_image(path/'full_size'/fname)
    im = im.rotate(180) # rotation helps avoid single color channel artifacts at the top of many images
    data = im.data
    if data.shape[2] >= 512:
        patches = data.unfold(1, 512, 512).unfold(2, 512, 512)

        for i in range(patches.shape[1]):
            for j in range(patches.shape[2]):
                patch_fname = im_id + '_' + str(i) + '_' + str(j) + '.jpg'
                pil_im = PIL.Image.fromarray(image2np(patches[:, i, j, :, :]*255).astype('uint8'))
                pil_im.save(path/'image_patches'/patch_fname, quality=95)
    else:
        print('thin image') # some images are less than 512 on the thinnest side - these are discarded

In [None]:
patch_fnames = os.listdir(path/'image_patches')

In [None]:
# code to resize patches
for f, filename in enumerate(patch_fnames):
    if f%1000 == 0:
        print(f)
    
    ims = [open_image(path/'image_patches'/filename) for i in range(6)]
    
    im1 = ims[0].resize((3,256,256))
    im2 = ims[1].resize((3,128,128))    
    im3 = ims[2].resize((3,64,64))
    im4 = ims[3].resize((3,32,32))
    im5 = ims[4].resize((3,16,16))
    im6 = ims[5].resize((3,8,8))
    
    PIL.Image.fromarray(image2np(im1.data*255).astype(np.uint8)).save(path/'patches_256'/filename, quality=95)
    PIL.Image.fromarray(image2np(im2.data*255).astype(np.uint8)).save(path/'patches_128'/filename, quality=95)
    PIL.Image.fromarray(image2np(im3.data*255).astype(np.uint8)).save(path/'patches_64'/filename, quality=95)
    PIL.Image.fromarray(image2np(im4.data*255).astype(np.uint8)).save(path/'patches_32'/filename, quality=95)
    PIL.Image.fromarray(image2np(im5.data*255).astype(np.uint8)).save(path/'patches_16'/filename, quality=95)
    PIL.Image.fromarray(image2np(im6.data*255).astype(np.uint8)).save(path/'patches_8'/filename, quality=95)
    