In [None]:
"""
    Extract the AutoKeras result, do additional runs


"""

In [None]:
import pdb
import time
import json
import os.path
import numpy as np
import pandas as pd
import datetime as dt
import multiprocess as mp
# ML related 
import torch
from torch import nn
from torch.utils.data import DataLoader
import tensorflow as tf
import autokeras as ak
from tensorflow.keras.models import load_model

# plot related
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import matplotlib.ticker as ticker
import matplotlib.gridspec as gridspec

from glob import glob
from tqdm import tqdm
from random import Random
from collections import OrderedDict
# from multiprocessing import Pool

In [None]:
#%% paths of source files

path_work_fd = './data'
path_figsav = './nel_daily'
path_mdsav = './md_res'


In [None]:
#%% parameters 
parms_raw = ['year', 'month', 'dayno', 'ut', 'slt', 'gdalt', 'gdlat', 'glon', \
         'fbar', 'f10.7', 'ap', 'ap3', 'bxgsm', 'bygsm', 'bzgsm', 'nel', 'dne', \
             'ti', 'dti', 'te', 'dte', 'vo', 'dvo']

parms_pnn = ['year', 'dayno', 'ut', 'f10.7', 'ap3', 'nel']
parms_use = ['year', 'month', 'dayno', 'ut', 'slt', 'gdalt', 'gdlat', 'glon', \
             'f10.7', 'ap3', 'nel', 'dne']

#%% parameters 
parms_inv = ['year', 'dayno', 'ut', 'f10.7', 'ap3', 'nel']
parms_input = ['year', 'doy_sin', 'doy_cos', 'ut_sin', 'ut_cos', 'f10.7', 'ap3']
parms_inp_norm = ['year', 'f10.7', 'ap3']
parms_output = ['nel']
    

# split on training/val/test sets
list_yr_val = [2010, 2015]
list_yr_test = [2007, 2012]

# location of Millstone ISR
isr_lat, isr_lon = 42.61, 288.51
diff_utslt = abs(isr_lon-360)/360*24



In [None]:
#%% confine to specified GPU device [please adjust accordingly]

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
#%% prepare data 
df_isr = pd.read_feather(os.path.join(path_work_fd, 'isr_hourly_ver1.lz4'))

df_isr.index = pd.to_datetime(df_isr['timestamp_ut'])
# prepare all the needed input parameters
df_isr['year'] = df_isr.index.year
df_isr['dayno'] = df_isr.index.dayofyear
df_isr['ut'] = df_isr.index.hour+df_isr.index.minute/60

# get cyclic on dayno and ut
doy_sin = (np.sin(df_isr['dayno']/365 * 2*np.pi)+1)/2
doy_cos = (np.cos(df_isr['dayno']/365 * 2*np.pi)+1)/2
ut_sin = (np.sin(df_isr['ut']/24 * 2*np.pi)+1)/2
ut_cos = (np.cos(df_isr['ut']/24 * 2*np.pi)+1)/2

df_isr_norm = df_isr.copy(True)
df_isr_norm['doy_sin'] = doy_sin
df_isr_norm['doy_cos'] = doy_cos
df_isr_norm['ut_sin'] = ut_sin
df_isr_norm['ut_cos'] = ut_cos
# normalize year, F10.7 and Ap3
df_isr_norm.loc[:, parms_inp_norm] /= df_isr.loc[:, parms_inp_norm].max()

# split training/validation/test
df_isr_train_norm = df_isr_norm.loc[~df_isr_norm.index.year.isin(list_yr_val+list_yr_test)]
df_isr_val_norm = df_isr_norm.loc[df_isr_norm.index.year.isin(list_yr_val)]
df_isr_test_norm = df_isr_norm.loc[df_isr_norm.index.year.isin(list_yr_test)]

# make DataLoader according to batch size
train_X = df_isr_train_norm.loc[:, parms_input].values
train_y = df_isr_train_norm.loc[:, parms_output].values

val_X = df_isr_val_norm.loc[:, parms_input].values
val_y = df_isr_val_norm.loc[:, parms_output].values

test_X = df_isr_test_norm.loc[:, parms_input].values
test_y = df_isr_test_norm.loc[:, parms_output].values


In [None]:
'''
    details of the AutoKeras run
    ak_sup/slnn_run1/
        best_model/
            assets/
            variables/
                variables.data-...
                variables.index
            fingerprint.pb
            keras_metadata.pb
            saved_model.pb
        
        trial_xxx/
            checkpoint
            checkpoint.data-...
            checkpoint.index
            pipeline
            trial.json


'''
mdname_ak = 'slnn_run1'

# load the trial with best(minimum) score
# load the saved json file
list_trial = sorted(glob(os.path.join(path_ak, mdname_ak, 'trial_*', 'trial.json')))

list_score = []
list_bstep = []

for trial in list_trial:
    with open(trial) as f:
        file_json = json.load(f)

    list_score.append(file_json['score'])
    list_bstep.append(file_json['best_step'])

list_score = np.array(list_score)
list_bstep = np.array(list_bstep)

    

In [None]:
# the best model is just the trial with best score
idx_min = np.argmin(list_score)
print(idx_min, list_score[idx_min], list_bstep[idx_min])


In [None]:
# load the best_model info
md_best = load_model(os.path.join(path_ak, mdname_ak, 'best_model'))


In [None]:
print(md_best.summary())
print(md_best.optimizer.get_config())


In [None]:
# md_best.load_weights(os.path.join(path_ak, mdname_ak, f'trial_{idx_min:03d}', 'checkpoint'))
