# Initialization

In [1]:
from PIL import Image
from wand.image import Image as WandImage
import glob, os, json, io, shutil, numpy, re, sys
import getexifdata as Exif

DIR_PLACES = "../s3/"
DIR_PREFIX_IGNORE = DIR_PLACES + "_" # Directories you don't want to show to the user
DIR_GEODAT = "geojson/"
DIR_AUD = "aud/"
DIR_IMG_ORIG = "imgOrig/"
DIR_IMG_LG = "imgLg/" # Full-sized processed images
DIR_IMG_THUMBNAIL = "imgSm/" # Smaller processed images (to load faster; the client creates links to the full-sized images)
DIR_IMG_ERR = "imgErr/" # Images not set to ignore coordinates but valid coordinates could not be found

IMG_PREFIX_NOMARKER = "nomarker-" # Images to process but not reveal the specific coordinates of

IMG_FORMAT = ".jpg"
AUD_FORMAT = ".mp3"

INPUT_JSON = "info_template.json"
OUTPUT_JSON = "info.json"
SMRY_JSON = "all_rivers.json"

SIZE_IMG_THUMBNAIL = 400 # This should match $size-thumbnail in Map.scss

# Image filenames that DONT have these prefixes will be used as a marker label. '201' included because some system filenames begin with the year.
BLACKLIST_FLABEL_PREFIX = ["IMG", "MVIMG", "PANO", "201"]

def valid_place(path):
    return not path.startswith(DIR_PREFIX_IGNORE)
    
# display progress on the same console output line
def show_progress():
    size = 0
    def out(prefix, n0=None, n1=None):
        nonlocal size
        disp = prefix if n0 is None and n1 is None else f"{prefix} {'[%d%%]' % n0} {'[%d%%]' % n1}"
        sys.stdout.write("\r" + " " * size + "\r") # clear the output of the last line
        sys.stdout.write(disp)
        sys.stdout.flush()
        size = len(disp)
    return out
    
# if dir exists, clear it
def init_dir(path):
    if os.path.isdir(path):
        shutil.rmtree(path)
    os.makedirs(path)
    
# sanity check: every audio file should have a matching img
def ensure_audio_img_match(sounds, images):
    missing = [sound for sound in flabels(sounds) if sound not in flabels(images)]
    if len(missing) > 0:
        raise FileNotFoundError(f"The following sounds need corresponding images: {missing}")

def flabels(ls):
    return [get_flabel(f) for f in ls]
        
# filename without extension--this could be the label to show with an image if the user renamed the file
def get_flabel(fpath):
    return get_fname(fpath).split(".")[0]

def get_fname(fpath):
    return fpath.split("/")[-1]
    
def find_geodat(base_path):
    return [strip_dir(fpath, "/" + DIR_GEODAT) for fpath in glob.glob(base_path + DIR_GEODAT + "*.geojson")]

def strip_dir(fpath, dir_):
    i = fpath.index(dir_)+len(dir_)
    return fpath[i:len(fpath)]
    
def format_extension(fname):
    extension = "." + fname.split(".")[-1]
    return fname.replace(extension, IMG_FORMAT)
    
def dont_create_marker_for_fpath(fpath):
    return get_fname(fpath).startswith(IMG_PREFIX_NOMARKER)

def write_images(base_path, fpath, success):
    if success:
        write_oriented_images(base_path, fpath)
    else:
        write_failed_image(base_path, fpath)

# save large and thumbnail images oriented correctly
def write_oriented_images(base_path, fpath):
    im = orient_image(fpath)
    fname = format_extension(get_fname(fpath))
    im.save(base_path + DIR_IMG_LG + fname)
    write_thumbnail_image(base_path, fname, im)

def orient_image(fpath):
    im = get_im(fpath)
    orientation = Exif.get_exif_data(im).get('Orientation')
    
    if orientation == 3:
        im = im.rotate(180, expand=True)
    elif orientation == 6:
        im = im.rotate(270, expand=True)
    elif orientation == 8:
        im = im.rotate(90, expand=True)
    
    return im
    
def get_im(fpath):
    try:
        return Image.open(fpath)
    except:
        return as_pil_image(fpath)
    
# convert HEIF images to PIL format
def as_pil_image(fpath):
    wand_im = WandImage(filename=fpath)
    img_buffer = numpy.asarray(bytearray(wand_im.make_blob(format='png')), dtype='uint8')
    bytesio = io.BytesIO(img_buffer)
    return Image.open(bytesio)
    
def write_thumbnail_image(base_path, fname, im):
    r = float(SIZE_IMG_THUMBNAIL) / im.size[1]
    w = float(im.size[0]) * r
    im.thumbnail((w, SIZE_IMG_THUMBNAIL))
    im.save(base_path + DIR_IMG_THUMBNAIL + fname)
    
def write_failed_image(base_path, fpath):
    im = get_im(fpath)
    im.save(base_path + DIR_IMG_ERR + get_fname(fpath))

# only permit location labels that were most likely hand-named image filenames
def verify_flabel(flabel):
    return any([flabel.startswith(substr) for substr in BLACKLIST_FLABEL_PREFIX])
    
def write_json(path, data):
    with io.open(path, 'w', encoding='utf8') as outfile:
        str_ = json.dumps(data, indent=4, sort_keys=True, separators=(',', ':'), ensure_ascii=False)
        outfile.write(to_unicode(str_))
        
def get_dir_name(base_path):
    return base_path.split("/")[-2]
        
# displayed place replaces "_" with " " and wraps years (yyyy) in parentheses
def pretty_dir_name(dirName):
    return re.sub(r"\d{4}", r"(\g<0>)", dirName.replace("_", " "))
    
try:
    to_unicode = unicode
except NameError:
    to_unicode = str

# Run

In [2]:
placels = [x for x in glob.glob(DIR_PLACES + "*/") if valid_place(x)] # ['../s3/Sacramento_River/']
places = []

progress = show_progress()

# process each directory
for i0, base_path in enumerate(placels):
    # clear workspace
    [init_dir(base_path + d) for d in [DIR_IMG_LG, DIR_IMG_THUMBNAIL, DIR_IMG_ERR]]
    
    sounds = glob.glob(base_path + DIR_AUD + "*" + AUD_FORMAT)
    images = glob.glob(base_path + DIR_IMG_ORIG + "*")
    
    # sanity check: throw error if an audio file doesn't have a corresponding image
    ensure_audio_img_match(sounds, images)

    # read json template
    with open(base_path + INPUT_JSON) as template:
        data = json.load(template)
        data["layers"] = find_geodat(base_path)
        
        for i1, fpath in enumerate(images):
            progress(f"{base_path} {get_fname(fpath)}", (100*(i0+1)/len(placels)), (100*(i1+1)/len(images)))
        
            if dont_create_marker_for_fpath(fpath):
                write_images(base_path, fpath, True)
            else:
                exif = Exif.get_exif_data(get_im(fpath))
                lat, lng = Exif.get_lat_lng(exif)
                if lat == None or lng == None:
                    write_images(base_path, fpath, False) # save to error images since we expected location data
                else:
                    flabel = get_flabel(fpath)
                    data["locations"].append({
                        "loc": {"lat":lat, "lng":lng},
                        "img": format_extension(get_fname(fpath)),
                        "label": flabel if verify_flabel(flabel) else None,
                        "date": Exif.get_date(exif),
                        "aud": f"{flabel}.{AUD_FORMAT}" if flabel in flabels(sounds) else None
                    })
                    write_images(base_path, fpath, True)
        
        write_json(base_path + OUTPUT_JSON, data)
    
    # add to summary json
    dirName = get_dir_name(base_path)
    places.append({
        "id": dirName,
        "disp": pretty_dir_name(dirName),
        "local": data.get("local") or False # This flag tells the client to list this place only at localhost
    })

write_json(DIR_PLACES + SMRY_JSON, {"places":places})
            
progress("FINISHED")

FINISHED                                                                            