In [None]:
import pandas as pd
import numpy as np
from glob import glob
import h5py
from pyproj import Transformer, CRS
from shapely.geometry import asPolygon, MultiPolygon, Point
import geopandas as gpd
from astropy.time import Time
import matplotlib.pyplot as plt
import time


In [None]:
def gps2dyr(time):
    """ Converte GPS time to decimal years. """
    return Time(time, format='gps').decimalyear


def read_reduced_atl06(file, xlims, ylims):
    ll2xy = Transformer.from_crs(4326,3031, always_xy = True)
    group = ['/gt1l', '/gt1r', '/gt2l', '/gt2r', '/gt3l', '/gt3r']
    dfs = []
    for k,g in enumerate(group):
        e = g in h5py.File(file,'r')
        if not e:
            continue
        with h5py.File(file,'r') as fi:
            lat = fi[g+'/land_ice_segments/latitude'][:]
            lon = fi[g+'/land_ice_segments/longitude'][:]
            h_li = fi[g+'/land_ice_segments/h_li'][:]
            quality = fi[g+'/land_ice_segments/atl06_quality_summary'][:]
            rgt = fi['/orbit_info/rgt'][:] * np.ones(len(lat))
            t_ref = fi['/ancillary_data/atlas_sdp_gps_epoch'][:]
            t_dt = fi[g+'/land_ice_segments/delta_time'][:]
            group = []
        x,y = ll2xy.transform(lon,lat)
        t_gps = t_ref + t_dt
        t_year = gps2dyr(t_gps)
        
        idx = (x>=xlims[0]) & (x <= xlims[1]) & (y>=ylims[0]) & (y <= ylims[1]) & (quality==0)
        if sum(idx) > 0:
            dfs.append(pd.DataFrame(data = {'x': x[idx], 
                                            'y': y[idx], 
                                            'h': h_li[idx], 
                                            't_fracyr': t_year[idx], 
                                            'lon': lon[idx], 
                                            'lat': lat[idx], 
                                            'quality': quality[idx]}))
    if len(dfs) > 0:
        df = pd.concat(dfs)    
        return df
    else:
        return []

In [None]:
filelist_fold = 'data/is2/filelists' #made by antarctic trackmap 
lakes = pd.DataFrame(data = {'name': ['slm'], 
                             'fullname': ['MercerSubglacialLake']})
# you really should pull the outline file fresh from github (see figure 1 plotting code)
outline_file = '/Users/siegfried/Documents/data/lakeoutlines/SiegfriedFricker2018/SiegfriedFricker2018-outlines.h5'

# import subglacial lake outlines (Siegfried & Fricker, 2018)
h5f = h5py.File(outline_file, 'r')
outline_geometries = [] # store polygons
citations = [] # store citation information

# we're going to calculate geodesic lake area because that is often screwed up 
# and occasionally incorrect in the literature
areas = []

# we're going to need to do some coordinate transforms for the geodesic area
# define CRS for Antarcica and make a converter from xy to ll
crs_ll = "EPSG:4326" # wgs84 in lon,lat 
crs_xy = h5f.attrs.get('proj_crs') # get projection from hdf5 file
xy_to_ll = Transformer.from_crs(crs_xy, crs_ll, always_xy = True) # make coord transformer
geod = CRS(crs_ll).get_geod() # geod object for calculating area on defined ellipsoid

# look through each lake and load all of it's info
for lake in h5f.keys():
    outline_x = h5f[lake]['x'][:]
    outline_y = h5f[lake]['y'][:]
    outlines_xy = np.stack((outline_x, outline_y),axis=2).reshape(outline_x.shape[1], 2)
    
    # A single lake with multiple polygons is NaN broken---need to identify and
    # load as a MultiPolygon. Otherwise it's easy (just load as polygon)
    if np.isnan(outlines_xy)[:,0].sum() == 0:
        geometry = asPolygon(outlines_xy)
        lon, lat = xy_to_ll.transform(outlines_xy[:,0], outlines_xy[:,1])
        this_area = abs(geod.polygon_area_perimeter(lon,lat)[0])/1e6
    else:
        this_area = 0
        # break at NaN values and load each as separate polygons
        idx = np.where(np.isnan(outlines_xy[:,0]))[0]
        
        # grab outline of first lake before getting into the loop
        this_outline = outlines_xy[0:idx[0],:]
        pgons = [asPolygon(this_outline)] # put the first polygon in a list
        lon,lat = xy_to_ll.transform(this_outline[:,0], this_outline[:,1]) 
        this_area += abs(geod.polygon_area_perimeter(lon,lat)[0])/1e6 # add its area
        for i in np.arange(0,len(idx)):
            if i == len(idx)-1:
                this_outline = outlines_xy[idx[i]+1:,:]
            else:
                this_outline = outlines_xy[idx[i]+1:idx[i+1]]
                
            pgons.append(asPolygon(this_outline))
            lon,lat = xy_to_ll.transform(this_outline[:,0], this_outline[:,1])
            this_area += abs(geod.polygon_area_perimeter(lon,lat)[0])/1e6
        geometry = MultiPolygon(pgons)
        
    # append all the results in the right place
    outline_geometries.append(geometry)
    citations.append(h5f[lake].attrs.get('citation')[0].decode('UTF-8'))
    areas.append(this_area)

# make a pandas dataframe with all the necessary info
df = pd.DataFrame(zip(h5f.keys(), outline_geometries, areas, citations), 
                  columns=['name', 'geometry', 'area (km^2)', 'cite'])
gdf = gpd.GeoDataFrame(df, crs=crs_xy, geometry=outline_geometries)
h5f.close()

In [None]:
for i,l in enumerate(lakes['name']):
    start_time = time.time()
    fullname = lakes['fullname'].iloc[i]
    if fullname != 'ConwaySubglacialLake':
        print(fullname)
        filelist = filelist_fold + '/' + l + "_filelist.txt"
        files = pd.read_csv(filelist, header = None, names=['filename'])
        outline = gdf[gdf['name'] == fullname]
        region = outline['geometry'].buffer(10000)

        bbox = region.bounds
        dfs=[]
        for idx,row in files.iterrows():
            if np.mod(idx,100)==0:
                print("..." + str(idx) + ' of ' + str(len(files)))
            df = read_reduced_atl06(row['filename'], 
                                    [bbox['minx'].iloc[0], bbox['maxx'].iloc[0]],
                                    [bbox['miny'].iloc[0], bbox['maxy'].iloc[0]])
            if len(df) > 0:
                #print(row['filename'],len(df))
                pts = gpd.GeoDataFrame(df, geometry=gpd.points_from_xy(df.x, df.y))
                pts = pts.set_crs(CRS("EPSG:3031"))
                gdf_reg = gpd.GeoDataFrame(geometry = region)
                gdf_reg.reset_index(drop = True, inplace = True)

                inpoly=pts.within(gdf_reg.at[0,'geometry'])
                dfs.append(df.loc[inpoly])
        alldata = pd.concat(dfs)
        alldata.to_pickle('data/is2/' + fullname + '.is2.atl06.004.pkl')
        print("--- %s hours ---" % ((time.time() - start_time)/(60*60)))