- For a given glacier...
- ... We have a glacier outline as a polygon which is in lat-long
- ... We have a several tifs (typically around 10) for a glacier. These are not guaranteed to share the same UTM zone.
- ... To create the masks we need one tif from each glacier and the polygon

In [None]:
import os
import sys
sys.path.insert(0, os.path.join(os.path.expanduser("~"),"Desktop","projects", "GlacierView", "src","segmentation","helpers"))
import read
from tqdm import tqdm
import pyproj
import numpy as np
import pandas as pd
import geopandas as gpd
import rasterio
from rasterio.mask import mask
import matplotlib.pyplot as plt
from shapely.ops import transform
import pickle
from PIL import Image

In [None]:
data_label = "localized_time_series_for_training_c02_t1_l2"
glacier_view_dir = os.path.join(os.path.expanduser("~"),"Desktop","projects","GlacierView")
meta_data_dir = os.path.join(glacier_view_dir, "src", "earth_engine", "data", "processed_metadata", data_label)
image_attributes_path = os.path.join(meta_data_dir,"image_attributes.csv")
glaciers_dir = os.path.join(glacier_view_dir,"src","earth_engine","data","ee_landing_zone",data_label,"landsat")
glims_ids = [f for f in os.listdir(glaciers_dir) if not f.startswith('.')]

In [None]:
### create mapping of all glims ids to a CRS
image_attributes_df = pd.read_csv(image_attributes_path)
image_attributes_df['glims_id'] = image_attributes_df.file_name.str.split("_").str[0]
image_attributes_df.head()

In [None]:
glims_id_crs_dict

In [None]:
glims_id_crs_dict = {}
for file_name, df in tqdm(image_attributes_df.groupby("file_name")):
    glims_id_crs_dict[file_name] = int(df.epsg_code.value_counts().index[0]) #most prevalant CRS

In [None]:
available_attributes = set(image_attributes_df['glims_id'])
polys_df.geometry.crs = "epsg:4326" #polys are in lat long
reprojected_df = pd.DataFrame()
#polys_df = polys_df[~polys_df['glac_id'].isin(reprojected_df['glac_id'])]
for glims_id, df_small in tqdm(polys_df.groupby('glac_id')):
    if glims_id not in available_attributes:
        continue
    selected_image_attributes = image_attributes_df[image_attributes_df['glims_id'] == glims_id].iloc[0]
    crs = selected_image_attributes['epsg_code']
    reprojected_df_small = df_small.to_crs(f'epsg:{crs}')
    reprojected_df_small['file_name'] = selected_image_attributes['file_name']
    reprojected_df = pd.concat((reprojected_df, reprojected_df_small))

In [None]:
#obtain masks from polygons
mask_dict = {}
for idx, row in tqdm(reprojected_df.iterrows()):
    glacier_dir = os.path.join(glaciers_dir, row.glac_id)
    with rasterio.open(os.path.join(glacier_dir, row['file_name'])) as src:
        masked_raster, _ = mask(src,[row.geometry], nodata = -999)
        masked_raster = masked_raster[0]
     #   masked_raster = np.rollaxis(masked_raster, 0,3)
        masked_raster = np.where(masked_raster != -999, 1, masked_raster)
        masked_raster = np.where(masked_raster == -999, 0, masked_raster)
        mask_dict[row.glac_id] = masked_raster

In [None]:
masks_staging_dir = os.path.join(glacier_view_dir, "src","segmentation", "training","data","masks_staging_2")
for glims_id in mask_dict:
    img_mask = mask_dict[glims_id]
    im = Image.fromarray(img_mask)
    im.save(os.path.join(masks_staging_dir,f"{glims_id}.tif"))