## Experiment using dataset of 10 y
run on google colab to utilize better gpu

### Boring Stuff

##### Imports

In [None]:
from google.colab import drive
drive.mount('/content/drive', force_remount=True)

!git clone https://riccih-dev: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@github.com/riccih-dev/bac_temp_downscaling.git

In [None]:
# reload drive
drive.mount('/content/drive', force_remount=True)

# get changes
!git pull

In [None]:
pip install cartopy

In [None]:

from IPython.display import display
import tensorflow as tf
import os
import numpy as np
import xarray as xr

from visualization.climate_data_visualizer import ClimateDataVisualizer
from visualization.evaluation_visualizer import EvaluationVisualization
from downscaling.pipeline import DownscalingPipeline
from utility.utility import save_to_json, store_to_disk, load_via_url, split_data
from model.modelconfig import UNetModelConfiguration
from utility.data_generator import DataGenerator

%reload_ext autoreload
%autoreload 2


config = tf.compat.v1.ConfigProto()
config.gpu_options.allow_growth = True
tf.compat.v1.Session(config=config)

gpus = tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# Specify the path to your desired directory
new_directory = "/content/bac_temp_downscaling/"

# Change the current working directory
os.chdir(new_directory)


current_directory = os.getcwd()

# Print the current working directory
print("Current Working Directory:", current_directory)

##### Settings

In [None]:
result_path='./results/'

# Standardized Anomalies
stats_file = './preprocessed_data/climatology_stats_sa_10y.json'
file_preprocessed_era5 = './preprocessed_data/era5_preprocessed_standardized_anomalies_10y.nc'
file_preprocessed_cerra = './preprocessed_data/cerra_preprocessed_standardized_anomalies_10y.nc'

# Min Max
#stats_file = './preprocessed_data/climatology_stats_MinMax_10y.json'
#file_preprocessed_era5 = './preprocessed_data/era5_preprocessed_min_max_10y.nc'
#file_preprocessed_cerra = './preprocessed_data/cerra_preprocessed_min_max_10y.nc'


### Loading Preprocessed Data

In [None]:
preprocessed_lr_data = xr.open_dataset(file_preprocessed_era5)
preprocessed_hr_data = xr.open_dataset(file_preprocessed_cerra)

### Data Splitting

In [None]:
lr_train_data, lr_val_data, lr_test_data, hr_train_data, hr_val_data, hr_test_data = split_data(preprocessed_lr_data, preprocessed_hr_data)

train_data = [lr_train_data, hr_train_data]
val_data = [lr_val_data, hr_val_data]
test_data = [lr_test_data, hr_test_data]

### Utility Functionality for Running the Model 

In [None]:
def run_model_pipeline(normalization_type, train_data, val_data, model_setup, filename_suffix):
    pipeline = DownscalingPipeline(normalization_type)

    train_data_generator = DataGenerator(train_data[0], train_data[1], model_setup['batch_size'])
    val_data_generator = DataGenerator(val_data[0], val_data[1], model_setup['batch_size'])

    model = pipeline.fit_model(
        train_generator = train_data_generator,
        val_generator = val_data_generator,
        scheduler_type = model_setup['scheduler_type'],
        learning_rate_value = model_setup['learning_rate_value'],
        num_epochs = model_setup['num_epochs'],
        loss_type = model_setup['loss_type'],
        filters = model_setup['filters']
        #show_summary = True
    )

    pipeline.show_training_history(filename_suffix)

    # Predict unseen data
    result = pipeline.predict(lr_test_data, stats_file)

    # evaluate predicted data
    hr_test_denormalized = pipeline.denormalize(hr_test_data, stats_file)
    metric_results = pipeline.evaluate_prediction(hr_test_denormalized, result)

    visualizer = EvaluationVisualization()
    visualizer.spatial_plots(hr_test_denormalized, result, filename_suffix)
    visualizer.difference_maps(hr_test_denormalized, result, filename_suffix)
    visualizer.histograms(hr_test_denormalized, result, filename_suffix)

    history = pipeline.get_history()
    save_to_json(filename_suffix, model_setup, history['loss'], history['val_loss'], metric_results, result_path)    

### Hyperparameter Optimization

#### Parameter Setting

In [None]:
filename_suffix = ''
last_idx = 100

normalization_types = ['standardized_anomalies', 'min_max']
scheduler_types = ['step_decay', 'exponential_decay', 'time_decay'] 
learning_rate_values = [1e-1, 1e-2, 1e-3, 1e-4]
loss_types = ['mse', 'mae', 'huber_loss'] 
num_epochs_list = [4, 10, 15, 20, 30, 50]
batch_sizes = [8, 16, 32, 64]
initial_filters = [16, 32, 56, 64]

model_configuration = UNetModelConfiguration()

### Hyperparameter Optimization Test

In [None]:
normalization_type = normalization_types[0]
scheduler_type = scheduler_types[0]
learning_rate_value = learning_rate_values[2]
num_epochs = num_epochs_list[0]
batch_size = batch_sizes[2]
loss_type = loss_types[2] 
initial_filter = initial_filters[1]
filters = model_configuration.generate_filters(initial_filter)
last_idx=0

model_setup = {
    'scheduler_type': scheduler_type,
    'learning_rate_value': learning_rate_value,
    'num_epochs': num_epochs,
    'batch_size': batch_size,
    'loss_type': loss_type,
    'filters': filters,
    'activation_function': 'tanh', 
    'note': '10y, cropped area'
}


# ------------ step_decay ------------
filename_suffix = '10y_sa_testing_'+str(last_idx)
run_model_pipeline(normalization_type, train_data, val_data, model_setup, filename_suffix)
last_idx += 1

### Save Results to drive
- model_and_results_SUFFIX.json
- histogram_plot_SUFFIX.png
- difference_plot_SUFFIX.png
- spatial_plot_SUFFIX.png
- training_history_plot_SUFFIX.png

In [None]:
!cp -r /content/bac_temp_downscaling/results/ /content/drive/MyDrive/bac_results

!cp -r /content/bac_temp_downscaling/results/ /content/drive/MyDrive/bac_results

!cp -r /content/bac_temp_downscaling/results/ /content/drive/MyDrive/bac_results