In [1]:
import sys
import os
sys.path.insert(0, os.path.join(os.path.expanduser("~"),"Desktop","projects", "GlacierView",
                                "src","segmentation","helpers"))
import read, preprocess, explore


import rasterio
import pandas as pd

import pickle

import numpy as np
import tifffile
import geopandas as gpd
import tensorflow as tf
from datetime import date
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from PIL import Image

import importlib
importlib.reload(read)
importlib.reload(preprocess)
importlib.reload(explore)

<module 'explore' from '/Users/mattw/Desktop/projects/GlacierView/src/segmentation/helpers/explore.py'>

# TRAINING

In [None]:
data_label = "localized_time_series_for_segmentation_training_large"
dem_label = "NASADEM"

glacier_view_dir = os.path.join(os.path.expanduser("~"),"Desktop","projects","GlacierView")
ee_data_dir = os.path.join(glacier_view_dir, "src", "earth_engine","data","ee_landing_zone", data_label)

landsat_dir = os.path.join(ee_data_dir, "landsat")
glims_ids = os.listdir(landsat_dir)
glims_ids = [id for id in glims_ids if len(id) == 14]

dem_dir = os.path.join(ee_data_dir, "dems")

masks_dir = os.path.join(glacier_view_dir, "src","segmentation","training","data","masks_staging")

common_bands = ['blue','green','red','nir','swir','thermal','swir_2']
dim = (128,128)

processed_training_data = os.path.join(glacier_view_dir, "src","segmentation","training","data","processed_training_data")
images_write_path = os.path.join(processed_training_data, "images")
masks_write_path = os.path.join(processed_training_data, "masks")

percentile = 50


In [None]:
for i, glims_id in enumerate(glims_ids):
    print(i)
    images = {}
    dems = {}
    masks = {}


    images[glims_id] = read.get_rasters(os.path.join(landsat_dir,glims_id))
    dems[glims_id] = read.get_dem(os.path.join(dem_dir,glims_id + '_' + dem_label + '.tif'))

    images[glims_id] = preprocess.get_common_bands(images[glims_id],common_bands)
    images[glims_id] = preprocess.normalize_rasters(images[glims_id])
    images[glims_id] = preprocess.resize_rasters(images[glims_id],dim)

    dems[glims_id] = preprocess.resize_rasters(dems[glims_id], dim)
    dems[glims_id] = preprocess.normalize_rasters(dems[glims_id])

    mask_file_name = f"{glims_id}.tif"
    try:
        img = Image.open(os.path.join(masks_dir, mask_file_name))
    except FileNotFoundError:
        continue
    masks[glims_id] = {mask_file_name: np.expand_dims(np.array(img),2)}
    masks[glims_id] = preprocess.resize_rasters(masks[glims_id], dim)

    combined_to_stack = []

    image = images[glims_id]
    dem = dems[glims_id]
    mask = masks[glims_id]

    combined_images_and_dems = [np.concatenate((image[file_name], dem[f"{glims_id}_NASADEM.tif"]),axis = 2) for file_name in image]
    smoothed_image = np.percentile(np.stack(combined_images_and_dems), percentile, axis = 0)
    if np.sum(smoothed_image == 0) < 50000: #convert to percent
        combined_to_stack.append(smoothed_image)

    X = np.stack(smoothed_image)

    tifffile.imsave(os.path.join(images_write_path,f"{glims_id}.tif"), X)
    tifffile.imsave(os.path.join(masks_write_path,f"{glims_id}.tif"),mask[f'{glims_id}.tif']) 

In [None]:
#explore.view_training_images(X_train, where = 0, n=100)

In [None]:
#X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=1)
#training_data = list(zip(X_train,y_train))
#test_data = list(zip(X_test, y_test))