In [8]:
import rasterio as rio
from rasterio.plot import show
from rasterio.mask import mask

import geopandas as gpd

import shapely as shp

import rioxarray

import gzip

import pandas as pd
import json
import tarfile
import wget
import os
import matplotlib.pyplot as plt

### Load Index CSV

In [9]:
index = pd.read_csv('../data/arcticdem_index_intersection.csv')
index = index.iloc[:, 1:]

index = index[['name', 'raster', 'fileurl', 'minx', 'maxx', 'miny', 'maxy']]

In [10]:
index.head()

Unnamed: 0,name,raster,fileurl,minx,maxx,miny,maxy
0,SETSM_GE01_20120820_1050410003FF2E00_105041000...,2018-04-12,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2029284.0,-2008648.0,634174.0,653348.0
1,SETSM_GE01_20120812_10504100007CE100_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087418.0,-2049076.0,658334.0,683876.0
2,SETSM_GE01_20120820_1050410003FF2E00_105041000...,2018-04-12,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2016832.0,-2005532.0,631818.0,649366.0
3,SETSM_GE01_20120812_10504100007A7300_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2086408.0,-2047890.0,661454.0,686714.0
4,SETSM_GE01_20120813_1050410000870B00_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087612.0,-2047606.0,659384.0,686590.0


### Filter for 2017

#### Change 'raster' column to datetime

In [4]:
index['raster'] = pd.to_datetime(index['raster'], format='%Y-%m-%d')

In [5]:
index_2017 = index.loc[(index['raster'] > '2016-12-31') & (index['raster'] < '2018-01-01')]
index_2017 = index_2017.reset_index(drop=True)

In [11]:
index_2017.head()

Unnamed: 0,name,raster,fileurl,minx,maxx,miny,maxy
0,SETSM_GE01_20120812_10504100007CE100_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087418.0,-2049076.0,658334.0,683876.0
1,SETSM_GE01_20120812_10504100007A7300_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2086408.0,-2047890.0,661454.0,686714.0
2,SETSM_GE01_20120813_1050410000870B00_105041000...,2017-09-29,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2087612.0,-2047606.0,659384.0,686590.0
3,SETSM_W1W1_20150924_10200100430C5200_102001004...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2056290.0,-2041814.0,662696.0,670774.0
4,SETSM_W1W1_20080409_1020010001144900_102001000...,2017-12-26,http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...,-2138483.0,-2124226.0,695532.0,705274.0


### Download Data

**Note that the data can be opened remotely, without downloading. See the cells below.**

The files are stored remotely in .tar.gz format, they must be downloaded first before they can be opened and GeoTIFF data extracted.

In [7]:
def download_file(row):
    fileurl = row['fileurl']
    name = row['name']
    
    file_dir = os.path.join('../data/rasters/', name)
    if not os.path.exists(file_dir):
        wget.download(fileurl, file_dir)
    

In [8]:
download_file(index_2017.iloc[1])

In [9]:
url = index_2017.iloc[0]['fileurl']
data = index_2017.iloc[0]
print(data)

src = rio.open('../data/rasters/SETSM_GE01_20120812_10504100007A7300_1050410000751500_seg1_2m_v3/SETSM_GE01_20120812_10504100007A7300_1050410000751500_seg1_2m_v3.0_dem.tif')
bbox = rio.coords.BoundingBox(data['minx'], data['miny'], data['maxx'], data['maxy'])
# show(src)
print('Raster:', src.bounds)
print('CSV:', bbox)

name       SETSM_GE01_20120812_10504100007CE100_105041000...
raster                                   2017-09-29 00:00:00
fileurl    http://data.pgc.umn.edu/elev/dem/setsm/ArcticD...
minx                                            -2.08742e+06
maxx                                            -2.04908e+06
miny                                                  658334
maxy                                                  683876
Name: 0, dtype: object
Raster: BoundingBox(left=-2086408.0, bottom=661452.0, right=-2047888.0, top=686714.0)
CSV: BoundingBox(left=-2087418.0, bottom=658334.0, right=-2049076.0, top=683876.0)


### Open file from ArcticDEM server, mask, and save it

See Rasterio Documentation: https://rasterio.readthedocs.io/en/latest/topics/datasets.html


**NOTE: Formats can be combined (ex. a tar.gz file from a remote server) by adding a '+' character.** 

URL Format: **'tar' + url_to_arctic_dem + '!' + filename**


In [None]:
data = index_2017.iloc[0]
url = data['fileurl']
rio_url = 'tar+' + url + '!' + data['name'] + '_dem.tif'
src = rio.open(rio_url)


xmin, xmax = data['minx'], data['maxx']
ymin, ymax = data['miny'], data['maxy']
bbox = shapely.geometry.box(xmin, ymin, xmax, ymax)
geo = gpd.GeoDataFrame({'geometry': bbox}, index=[0], crs=src.crs)
geo = [json.loads(geo.to_json())['features'][0]['geometry']]


out_img, out_transform = mask(src, shapes=geo, crop=True)

out_meta = src.meta.copy()
out_meta.update({'driver':'GTiff',
                 'height': out_img.shape[1],
                 'width': out_img.shape[2],
                 'transform': out_transform,
                 'crs': src.crs
                })

with rio.open('./test.tif', 'w', **out_meta) as dst:
    dst.write(out_img)

### Notes:

1. **Aspect could be a useful parameter, they show interesting behavior at pingo sites.**
2. Slope could be a useful parameter
3. 