In [None]:
import sys
import os
sys.path.insert(0, os.path.join(os.path.expanduser("~"),"Desktop","projects", "GlacierView", "src","segmentation","helpers"))
import read, preprocess, explore


import rasterio
import pandas as pd
import pickle

import numpy as np
import geopandas as gpd
import tensorflow as tf
from datetime import date
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

import importlib
importlib.reload(read)
importlib.reload(preprocess)
importlib.reload(explore)

# TRAINING

In [None]:
glacier_view_dir = os.path.join(os.path.expanduser("~"),"Desktop","projects","GlacierView")

In [None]:
ee_data_dir = os.path.join(os.path.expanduser("~"),
    "Desktop", "projects","GlacierView", "src", "earth_engine","data","ee_landing_zone","localized_time_series_for_segmentation_training_large")
landsat_dir = os.path.join(ee_data_dir, "landsat")
dem_dir = os.path.join(ee_data_dir, "dems")
masks_path = os.path.join(glacier_view_dir, "src","segmentation","training","data","training_data_pickles","mask_dict_large.pickle")


In [None]:
images_dict = {}
for glacier_dir_name in os.listdir(landsat_dir):
    if glacier_dir_name.startswith('.'): 
        continue #ignores hidden files
    glims_id = glacier_dir_name
    images_dict[glims_id] = read.get_rasters(os.path.join(landsat_dir,glacier_dir_name))[0]

dem_dict = {}
for dem_file_name in os.listdir(dem_dir):
    glims_id = dem_file_name.split("_")[0]
    dem_dict[glims_id] = read.get_dem(os.path.join(dem_dir,dem_file_name))


with open(masks_path, "rb") as infile:
    mask_dict = pickle.load(infile)

In [None]:
dim = (128,128)
images_dict_processed = {}
for image in images_dict:
    images_dict_processed[image] = preprocess.get_common_bands_from_list_of_numpy_arrays(images_dict[image],
                                                                            ['red',
                                                                             'nir',
                                                                             'swir',
                                                                             ])
    images_dict_processed[image] = preprocess.normalize_rasters(images_dict_processed[image])
    images_dict_processed[image] = preprocess.resize_rasters(images_dict_processed[image],dim)

In [None]:
dem_dict_processed = {}
for dem in dem_dict:
    dem_dict_processed[dem] = preprocess.resize_rasters([dem_dict[dem]], dim)
    dem_dict_processed[dem] = preprocess.normalize_rasters(dem_dict_processed[dem])

mask_dict_processed = {}
for mask in mask_dict:
    mask_dict_processed[mask] = preprocess.resize_rasters([mask_dict[mask]], dim)
    
common_set = set(images_dict_processed.keys()).intersection(set(dem_dict_processed.keys())).intersection(set(mask_dict_processed.keys()))

In [None]:
percentile = 0.5
combined_to_stack = []
masks = []
for glims_id in common_set:
    combined_images_and_dems = [np.concatenate((img, dem_dict_processed[glims_id][0]),axis = 2) for img in images_dict_processed[glims_id]]
    smoothed_image = np.percentile(np.stack(combined_images_and_dems), percentile, axis = 0)
    if np.sum(smoothed_image == 0) < 50000: #convert to percent
        combined_to_stack.append(smoothed_image)
        masks.append(mask_dict_processed[glims_id][0])


In [None]:
X = np.stack(combined_to_stack)
y = np.stack(masks)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.10, random_state=42)
training_data = list(zip(X_train,y_train))
test_data = list(zip(X_test, y_test))

In [None]:
training_data_pickles_dir = os.path.join(glacier_view_dir, "src","segmentation","training","data","training_data_pickles")
with open(os.path.join(training_data_pickles_dir,'training_data.pickle'), 'wb') as handle:
    pickle.dump(training_data, handle)

with open(os.path.join(training_data_pickles_dir,'test_data.pickle'), 'wb') as handle:
    pickle.dump(test_data, handle)

In [None]:
explore.view_training_images(X_train, where = 0, n=100)