### Generate all Paths to a config file

In [None]:
## generate conf file for all slices
glaciers_file = "/datadrive/glaciers/vector_data/Glacier_2005.shp"
clean_g_file = "/datadrive/glaciers/vector_data/clean.shp"
debris_g_file = "/datadrive/glaciers/vector_data/debris.shp"
border_file = "/datadrive/glaciers/vector_data/hkh.shp"

input_folder = "/datadrive/glaciers/unique_tiles"

In [None]:
import pathlib

paths = {}
for i, f in enumerate(pathlib.Path(input_folder).iterdir()):
    mask_ele = {}
    mask_ele["img_path"] = str(f)
    mask_ele["mask_paths"] = [glaciers_file, clean_g_file, debris_g_file]
    mask_ele["border_path"] = border_file 
    paths[f"mask_{i}"] = mask_ele 
    

In [None]:
import yaml

conf_file = "/datadrive/glaciers/conf/masking_paths_all_data.yaml"
with open(conf_file, 'w') as file:
    yaml.dump(paths, file, default_flow_style=False, sort_keys=False)

In [None]:
# incase there is an old folder
# !rm -rf /datadrive/glaciers/processed_exper

### Maksing

In [None]:
import yaml
from glacier_mapping.data.mask import generate_masks

masking_paths = yaml.load(open(conf_file))
img_paths = [p["img_path"] for p in masking_paths.values()]
mask_paths = [p["mask_paths"] for p in masking_paths.values()]
border_paths = [p["border_path"] for p in masking_paths.values()]
out_dir = pathlib.Path("/datadrive/glaciers/processed_exper/masks")
generate_masks(img_paths, mask_paths, border_paths=border_paths, out_dir=out_dir)

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pathlib
import pandas as pd

mask_dir = pathlib.Path("/datadrive/glaciers/", "processed_exper", "masks")
mask = np.load(mask_dir / "mask_05.npy")
border = np.load(mask_dir / "border_05.npy")
print(mask.shape)
_, ax = plt.subplots(1, 4, figsize=(15, 15))
ax[0].imshow(mask[:, :, 0])
ax[1].imshow(mask[:, :, 1])
ax[2].imshow(mask[:, :, 2])
ax[3].imshow(border)

mask_df = pd.read_csv(mask_dir / "mask_metadata.csv")
mask_df.head()

In [None]:
# incase there is an old folder
# !rm -r /datadrive/glaciers/processed_exper/slices

### Slicing

In [None]:
import pathlib
import pandas as pd
from glacier_mapping.data.slice import write_pair_slices

processed_dir = pathlib.Path("/datadrive/glaciers", "processed_exper")
paths = pd.read_csv(processed_dir / "masks" / "mask_metadata.csv")
output_dir = processed_dir / "slices"
output_dir.mkdir(parents=True, exist_ok=True)

metadata = []
for row in range(len(paths)):
    print(f"## Slicing tiff {row +1}/{len(paths)} ...")
    metadata_ = write_pair_slices(
        paths.iloc[row]["img"],
        paths.iloc[row]["mask"],
        output_dir,
        border_path=paths.iloc[row]["border"],
        out_base=f"slice_{paths.index[row]}"
    )
    metadata.append(metadata_)

metadata = pd.concat(metadata, axis=0)
out_path = pathlib.Path(output_dir, "slices.geojson")
metadata.to_file(out_path, index=False, driver="GeoJSON")

In [None]:
from glacier_mapping.data.slice import plot_slices
plot_slices("/datadrive/glaciers/processed_exper/slices/", n_cols=4, div=300)

### split and filter

In [None]:
from addict import Dict
import geopandas as gpd
import numpy as np
import rasterio
import glacier_mapping.data.process_slices_funs as pf

output_dir = pathlib.Path("/datadrive/glaciers/", "processed_exper")
pconf = Dict(yaml.safe_load(open("/datadrive/glaciers/conf/postprocess.yaml", "r")))
slice_meta = gpd.read_file(pathlib.Path(output_dir, "slices", "slices.geojson"))

# filter all the slices to the ones that matter
print("filtering")
keep_ids = pf.filter_directory(
    slice_meta,
    filter_perc=pconf.filter_percentage,
    filter_channel=pconf.filter_channel,
)

# validation: get ids for the ones that will be training vs. testing.
print("reshuffling")
split_method = [item for item in pconf.split_method.items()][0][0]
split_ratio = pconf.split_method[split_method].split_ratio
split_fun = getattr(pf, split_method)
split_ids = split_fun(keep_ids, split_ratio, slice_meta=slice_meta)
target_locs = pf.reshuffle(split_ids, output_dir)


In [None]:
from addict import Dict
import geopandas as gpd
import numpy as np
import rasterio
import glacier_mapping.data.process_slices_funs as pf

output_dir = pathlib.Path("/datadrive/glaciers/", "processed_exper")
pconf = Dict(yaml.safe_load(open("/datadrive/glaciers/conf/postprocess.yaml", "r")))
slice_meta = gpd.read_file(pathlib.Path(output_dir, "slices", "slices.geojson"))

# filter all the slices to the ones that matter
print("filtering")
keep_ids = pf.filter_directory(
    slice_meta,
    filter_perc=pconf.filter_percentage,
    filter_channel=pconf.filter_channel,
)


In [None]:
# save target_locs
import pickle
target_locs_file = '/datadrive/glaciers/processed_exper/target_locs.pickle'
with open(target_locs_file, 'wb') as f:
    pickle.dump(target_locs, f, protocol=pickle.HIGHEST_PROTOCOL)
