- We have a polygon and we have a list of tifs for a glacier. These tifs almost always are in the same crs and utm zone. Before we smooth the tifs we have to create the masks.
- To create the masks we need one tif from each glacier and the polygon

In [1]:
import os
import sys
sys.path.insert(0, os.path.join(os.path.expanduser("~"),"Desktop","projects", "GlacierView", "src","segmentation","helpers"))
import read

import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import matplotlib.pyplot as plt
import pickle

In [4]:
glacier_view_dir = os.path.join(os.path.expanduser("~"),"Desktop","projects","GlacierView")

In [31]:
#create mapping of all glims ids to a CRS
training_metadata_path = os.path.join(glacier_view_dir,"src","earth_engine","data","processed_metadata","localized_time_series_for_segmentation_training_large","ee_metadata.csv")
metadata_df = pd.read_csv(training_metadata_path)
metadata_df['glims_id'] = metadata_df.glacier_pk.str.split("_").str[0]
metadata_df['crs'] = ['EPSG:326' + str(round(zone)) for zone in metadata_df['utm_zone']]

In [32]:
glims_id_crs_dict = {}
for glims_id, df in metadata_df.groupby("glims_id"):
    try: 
        glims_id_crs_dict[glims_id] = int(df.crs.value_counts().index[0])
    except: #for no CRS (very few)
        pass

In [33]:
#reproject the polygons to UTM
glims_training_sample_path = os.path.join(glacier_view_dir,"src","glims","data","training_sample","glims_3000_bb.shp")
polys_df = gpd.read_file(glims_training_sample_path)
polys_df.geometry.crs = "epsg:4326" #polys are in lat long

reprojected_polygons = []
crs_list = []
for i in range(polys_df.shape[0]):
    row = polys_df.iloc[[i],:]
    if row.glac_id.iloc[0] in glims_id_crs_dict:
        crs = glims_id_crs_dict.get(row.glac_id.iloc[0])
        crs_list.append(crs)
        reprojected_polygons.append(row.geometry.to_crs(f"epsg:{crs}"))
    else:
        crs_list.append('4326')
        reprojected_polygons.append(row.geometry)

        poly_list = []
for idx, poly in enumerate(reprojected_polygons):
    poly_list.append(reprojected_polygons[idx][idx])

In [34]:
#obtain masks from polygons
landsat_dir = os.path.join(glacier_view_dir,"src","earth_engine","data","ee_landing_zone","localized_time_series_for_segmentation_training_large","landsat")
poly_glims_ids = polys_df.glac_id

mask_dict = {}
for glims_id in os.listdir(landsat_dir):
    glacier_dir = os.path.join(landsat_dir, glims_id)
    try:
        tif_name = os.listdir(glacier_dir)[0]
        poly_list_loc = np.where(glims_id == poly_glims_ids)[0][0]
        with rasterio.open(os.path.join(glacier_dir, tif_name)) as src:
            masked, _ = mask(src, [poly_list[poly_list_loc]], nodata = 0)
        mask_clean = np.rollaxis(np.where(masked != 0, 1, masked),0,3)[:,:,[0]]
        mask_dict[glims_id] = mask_clean
    except:
        pass



In [35]:
#save series of masks
with open(os.path.join(glacier_view_dir, "src","segmentation","training","data","training_data_pickles","mask_dict_large.pickle"),'wb') as handle:
    pickle.dump(mask_dict, handle)