In [8]:
import rasterio as rio
from rasterio.plot import show
from rasterio.mask import mask

import geopandas as gpd

import shapely as shp

import rioxarray

import gzip

import pandas as pd
import json
import tarfile
import wget
import os
import matplotlib.pyplot as plt

### Load Index CSV

In [9]:
index = pd.read_csv('../data/arcticdem_index_intersection.csv')
index = index.iloc[:, 1:]

index = index[['name', 'raster', 'fileurl', 'minx', 'maxx', 'miny', 'maxy']]

In [10]:
index.head()

Unnamed: 0,name,raster,fileurl,minx,maxx,miny,maxy
0,SETSM_GE01_20120820_1050410003FF2E00_105041000...,2018-04-12,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2029284.0,-2008648.0,634174.0,653348.0
1,SETSM_GE01_20120812_10504100007CE100_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087418.0,-2049076.0,658334.0,683876.0
2,SETSM_GE01_20120820_1050410003FF2E00_105041000...,2018-04-12,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2016832.0,-2005532.0,631818.0,649366.0
3,SETSM_GE01_20120812_10504100007A7300_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2086408.0,-2047890.0,661454.0,686714.0
4,SETSM_GE01_20120813_1050410000870B00_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087612.0,-2047606.0,659384.0,686590.0


### Filter for 2017

#### Change 'raster' column to datetime

In [4]:
index['raster'] = pd.to_datetime(index['raster'], format='%Y-%m-%d')

In [5]:
index_2017 = index.loc[(index['raster'] > '2016-12-31') & (index['raster'] < '2018-01-01')]
index_2017 = index_2017.reset_index(drop=True)

In [11]:
index_2017.head()

Unnamed: 0,name,raster,fileurl,minx,maxx,miny,maxy
0,SETSM_GE01_20120812_10504100007CE100_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087418.0,-2049076.0,658334.0,683876.0
1,SETSM_GE01_20120812_10504100007A7300_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2086408.0,-2047890.0,661454.0,686714.0
2,SETSM_GE01_20120813_1050410000870B00_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087612.0,-2047606.0,659384.0,686590.0
3,SETSM_W1W1_20150924_10200100430C5200_102001004...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2056290.0,-2041814.0,662696.0,670774.0
4,SETSM_W1W1_20080409_1020010001144900_102001000...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2138483.0,-2124226.0,695532.0,705274.0


### Download Data

**Note that the data can be opened remotely, without downloading. See the cells below.**

The files are stored remotely in .tar.gz format, they must be downloaded first before they can be opened and GeoTIFF data extracted.

In [7]:
def download_file(row):
    fileurl = row['fileurl']
    name = row['name']
    
    file_dir = os.path.join('../data/rasters/', name)
    if not os.path.exists(file_dir):
        wget.download(fileurl, file_dir)
    

In [8]:
download_file(index_2017.iloc[1])

In [9]:
url = index_2017.iloc[0]['fileurl']
data = index_2017.iloc[0]
print(data)

src = rio.open('../data/rasters/SETSM_GE01_20120812_10504100007A7300_1050410000751500_seg1_2m_v3/SETSM_GE01_20120812_10504100007A7300_1050410000751500_seg1_2m_v3.0_dem.tif')
bbox = rio.coords.BoundingBox(data['minx'], data['miny'], data['maxx'], data['maxy'])
# show(src)
print('Raster:', src.bounds)
print('CSV:', bbox)

name       SETSM_GE01_20120812_10504100007CE100_105041000...
raster                                   2017-09-29 00:00:00
fileurl    http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...
minx                                            -2.08742e+06
maxx                                            -2.04908e+06
miny                                                  658334
maxy                                                  683876
Name: 0, dtype: object
Raster: BoundingBox(left=-2086408.0, bottom=661452.0, right=-2047888.0, top=686714.0)
CSV: BoundingBox(left=-2087418.0, bottom=658334.0, right=-2049076.0, top=683876.0)


### Open file from ArcticDEM server, mask, and save it

See Rasterio Documentation: https://rasterio.readthedocs.io/en/latest/topics/datasets.html


**NOTE: Formats can be combined (ex. a tar.gz file from a remote server) by adding a '+' character.** 

URL Format: **'tar' + url_to_arctic_dem + '!' + filename**


In [None]:
data = index_2017.iloc[0]
url = data['fileurl']
rio_url = 'tar+' + url + '!' + data['name'] + '_dem.tif'
src = rio.open(rio_url)


xmin, xmax = data['minx'], data['maxx']
ymin, ymax = data['miny'], data['maxy']
bbox = shapely.geometry.box(xmin, ymin, xmax, ymax)
geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=src.crs)
geo = [json.loads(geo.to_json())['features'][0]['geometry']]


out_img, out_transform = mask(src, shapes=geo, crop=True)

out_meta = src.meta.copy()
out_meta.update({'driver':'GTiff',
                 'height': out_img.shape[1],
                 'width': out_img.shape[2],
                 'transform': out_transform,
                 'crs': src.crs
                })

with rio.open('./test.tif', 'w', **out_meta) as dst:
    dst.write(out_img)

### Notes:

1. **Aspect could be a useful parameter, they show interesting behavior at pingo sites.**
2. Slope could be a useful parameter
3. 

### Find overlap of all 2017 rasters

If the rasters overlap, then we will consider them grouped, and combine them with a mosaic operation later. 

Otherwise they will be in their own group / raster.

We will use the bounding box of the rasters (minx, maxx, miny, maxy) to find the overlap.

In [13]:
index_2017

Unnamed: 0,name,raster,fileurl,minx,maxx,miny,maxy
0,SETSM_GE01_20120812_10504100007CE100_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.087418e+06,-2049076.0,658334.000000,683876.000000
1,SETSM_GE01_20120812_10504100007A7300_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.086408e+06,-2047890.0,661454.000000,686714.000000
2,SETSM_GE01_20120813_1050410000870B00_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.087612e+06,-2047606.0,659384.000000,686590.000000
3,SETSM_W1W1_20150924_10200100430C5200_102001004...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.056290e+06,-2041814.0,662696.000000,670774.000000
4,SETSM_W1W1_20080409_1020010001144900_102001000...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.138483e+06,-2124226.0,695532.000000,705274.000000
...,...,...,...,...,...,...,...
460,SETSM_WV03_20170617_104001002F43DB00_104001002...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.124364e+06,-2108836.0,654464.000000,673376.000000
461,SETSM_WV03_20160830_1040010021B1EE00_104001002...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.138483e+06,-2136288.0,682209.615846,689474.635247
462,SETSM_WV03_20150513_104001000BC50400_104001000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.138483e+06,-2092322.0,662130.000000,689049.191298
463,SETSM_WV03_20161018_1040010025403900_104001002...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2.138483e+06,-2136704.0,682577.660839,689486.199277


### Function to create RasterIO BoundingBoxes for each Raster

In [84]:
def create_boundingbox(raster):
    raster['bbox'] = rio.coords.BoundingBox(raster['minx'], raster['miny'], raster['maxx'], raster['maxy'])
    return raster

In [90]:
index_2017 = index_2017.apply(create_boundingbox, axis='columns')

In [91]:
def find_overlaps(raster):
    raster['overlap'] = []
    
    # Check all other rasters
    for index, row in index_2017.iterrows():
        if not rio.coords.disjoint_bounds(raster['bbox'], row['bbox']):  # then they are overlapping
            raster['overlap'].append(row['name'])
            
    raster['overlap'] = pd.Series(raster['overlap'])
    return raster

In [93]:
index_2017 = index_2017.apply(find_overlaps, axis='columns')


0      0     SETSM_GE01_20120812_10504100007CE100_105...
1      0     SETSM_GE01_20120812_10504100007CE100_105...
2      0     SETSM_GE01_20120812_10504100007CE100_105...
3      0     SETSM_GE01_20120812_10504100007CE100_105...
4      0     SETSM_W1W1_20080409_1020010001144900_102...
                             ...                        
460    0     SETSM_WV01_20150412_102001003C896700_102...
461    0     SETSM_WV01_20131014_102001002596D400_102...
462    0     SETSM_WV01_20131014_102001002596D400_102...
463    0     SETSM_WV01_20131014_102001002596D400_102...
464    0     SETSM_WV01_20131014_102001002596D400_102...
Name: overlap, Length: 465, dtype: object

### Get Amount of Overlap for Each Raster

In [98]:
index_2017['num_overlap'] = index_2017['overlap'].apply(lambda x: len(x))

In [102]:
print('Max Overlap:', index_2017['num_overlap'].max())
print('Min Overlap:', index_2017['num_overlap'].min())
print('Mean Overlap:', index_2017['num_overlap'].mean())

Max Overlap: 115
Min Overlap: 5
Mean Overlap: 49.589247311827954


This means the average mosaic will be composed of 50 tiles ideally.

## Check for overlap chains