In [1]:
import numpy as np
import netCDF4
import h5netcdf
import xarray as xr
import sys
import tensorflow as tf
from tensorflow import keras
import pickle
from os.path import join
sys.path.append('/home/samuel.varga/projects/deep_learning/')
sys.path.append('/home/samuel.varga/python_packages/fronts/')
from deep_learning.training_utils import load_rotation, convert_to_tf, resize_neural_net
from deep_learning.deep_networks import create_U_net_classifier_2D
from custom_losses import brier_skill_score, critical_success_index, fractions_skill_score

2024-01-19 13:44:51.600231: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-19 13:44:56.261223: I tensorflow/core/util/port.cc:104] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
from keras import backend as K
import gc
from numba import cuda
outdir='/work/samuel.varga/projects/2to6_hr_severe_wx/DEEP_LEARNING/'

# Binary Classification

In [3]:
batch_size=2048
target_column='any_severe__36km'
rotation=0
X_train, y_train, mean, variance = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__training_data.nc'), rotation, target_column)
X_val, y_val = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__validation_data.nc'), rotation, target_column)
X_test, y_test = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__testing_data.nc'), None, target_column)
print(np.shape(X_train))
print(np.shape(y_train))

#Convert to tf dataset
train_ds = convert_to_tf((X_train,np.expand_dims(y_train, axis=-1)), batch_size)
val_ds = convert_to_tf((X_val, np.expand_dims(y_val, axis=-1)), batch_size)
test_ds=convert_to_tf((X_test[None,:,:,:], np.expand_dims(y_test[None,:,:,:], axis=-1)))

Training path detected - loading scaling
(6350, 16, 16, 63)
(6350, 16, 16)


2024-01-19 13:46:23.271896: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 AVX512F AVX512_VNNI AVX512_BF16 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-01-19 13:46:23.540961: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1613] Created device /job:localhost/replica:0/task:0/device:GPU:0 with 79086 MB memory:  -> device: 0, name: NVIDIA A100 80GB PCIe, pci bus id: 0000:21:00.0, compute capability: 8.0


# Multiclass Classification

In [77]:
batch_size=2048
target_column=['any_severe__36km','wind_severe__36km','hail_severe__36km','tornado_severe__36km']
rotation=0
X_train, y_train, mean, variance = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__training_data.nc'), rotation, target_column)
X_val, y_val = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__validation_data.nc'), rotation, target_column)
X_test, y_test = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__testing_data.nc'), None, target_column)

print(np.shape(X_train))
print(np.shape(y_train))
#Convert to tf dataset

train_ds = convert_to_tf((X_train, np.reshape(np.array([y_train[v] for v in y_train.keys()]), (6350,16,16,4))), batch_size)
val_ds = convert_to_tf((X_val, np.reshape(np.array([y_val[v] for v in y_val.keys()]), (1530,16,16,4))), batch_size)
test_ds=convert_to_tf((X_test[None,:,:,:], np.reshape(np.array([y_test[v] for v in y_test.keys()]), (3410,16,16,4))[None,:,:,:]))

Training path detected - loading scaling
(6350, 16, 16, 63)
()


In [17]:
#U-net architectural parameters
i=2
conv_filters=[i*n for n in [32,64,128,256]]
conv_size=[4,3,2,2]
max_pool=[2,2,2,2]
conv_layers =[{'filters': f, 'kernel_size': (s), 'pool_size': (p), 'strides': (p)} if p > 1
                   else {'filters': f, 'kernel_size': (s), 'pool_size': None, 'strides': None}
                   for s, f, p, in zip(conv_size, conv_filters, max_pool)]
args={'lrate':1e-4, 'loss':'binary_crossentropy','activation_conv':'relu','activation_out':'sigmoid',
     'p_spatial_dropout':0.15, 'filters':conv_filters, 'size':conv_size, 'pool':max_pool, 'shape':(16,16),
     'rotation':rotation,'target_column':target_column, 'i':i}
thresholds= [0.15]
metrics=[tf.keras.metrics.SparseCategoricalAccuracy(), tf.keras.metrics.MeanSquaredError(name='Brier score'),
    tf.keras.metrics.AUC(name='auc'), tf.keras.metrics.AUC(name='prc', curve='PR'),     
         tf.keras.metrics.FalseNegatives(thresholds=thresholds), tf.keras.metrics.FalsePositives(thresholds=thresholds),
         tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.TrueNegatives(thresholds=thresholds),
         tf.keras.metrics.TruePositives(thresholds=thresholds)]

In [18]:
#Create U-net
u_net = create_U_net_classifier_2D(image_size=args['shape'], nchannels=63, n_classes=1, conv_layers=conv_layers, p_spatial_dropout=args['p_spatial_dropout'], metrics=metrics,
                               lrate=args['lrate'], loss=args['loss'], activation_conv=args['activation_conv'], activation_out='sigmoid',
                                      normalization=(mean, variance))

Model: "model_2"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input (InputLayer)             [(None, 16, 16, 63)  0           []                               
                                ]                                                                 
                                                                                                  
 normalization_2 (Normalization  (None, 16, 16, 63)  0           ['Input[0][0]']                  
 )                                                                                                
                                                                                                  
 Encode_0_0 (Conv2D)            (None, 16, 16, 64)   64576       ['normalization_2[0][0]']        
                                                                                            

 De_Sp_Dr_2_1 (SpatialDropout2D  (None, 4, 4, 256)   0           ['Decode_2_1[0][0]']             
 )                                                                                                
                                                                                                  
 Decode_2_0 (Conv2D)            (None, 4, 4, 256)    262400      ['De_Sp_Dr_2_1[0][0]']           
                                                                                                  
 De_Sp_Dr_2_0 (SpatialDropout2D  (None, 4, 4, 256)   0           ['Decode_2_0[0][0]']             
 )                                                                                                
                                                                                                  
 Decode_Upsample_1 (UpSampling2  (None, 8, 8, 256)   0           ['De_Sp_Dr_2_0[0][0]']           
 D)                                                                                               
          

In [19]:
#Callbacks
early_stopping_cb =keras.callbacks.EarlyStopping(patience=25, restore_best_weights=True,
                                                min_delta=0.001, monitor='val_loss')
tensorboard_cb= keras.callbacks.TensorBoard(log_dir=join(outdir,'logs'), histogram_freq=1)

In [20]:
train_ds

<PrefetchDataset element_spec=(TensorSpec(shape=(None, 16, 16, 63), dtype=tf.float64, name=None), TensorSpec(shape=(None, 16, 16, 1), dtype=tf.int64, name=None))>

In [21]:
#Learn the model
history = u_net.fit(train_ds, epochs=100, verbose=True, validation_data = val_ds,
    callbacks=[early_stopping_cb, tensorboard_cb])

Epoch 1/100


2024-01-19 13:49:10.332325: E tensorflow/core/grappler/optimizers/meta_optimizer.cc:954] layout failed: INVALID_ARGUMENT: Size of values 0 does not match size of permutation 4 @ fanin shape inmodel_2/En_Sp_Dr_0_0/dropout/SelectV2-2-TransposeNHWCToNCHW-LayoutOptimizer


Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100


Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100


Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100


Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100


Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100


Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100


In [22]:
def save_results(args, u_net, history, train_ds, val_ds, test_ds):
    fbase = f"{args['target_column']}_Rot_{args['rotation']}_{args['shape'][0]}_{args['shape'][1]}_lrate_{args['lrate']}_spatial_dropout_{args['p_spatial_dropout']}_i_{args['i']}_filters_{args['filters']}_size_{args['size']}_pool_{args['pool']}_loss_{args['loss']}"
    results = {}
    results['args'] = args
    #results['predict_val'] = u_net.predict(val_ds)
    results['predict_val_eval'] = u_net.evaluate(val_ds)
    print(results['predict_val_eval'])

    if test_ds is not None:
        results['predict_test']=u_net.predict(test_ds)
        results['predict_test_eval']=u_net.evaluate(test_ds)

    #results['predict_train']=u_net.predict(train_ds)
    results['predict_train_eval']=u_net.evaluate(train_ds)
    results['history']=history.history
    results['fname_base']=fbase

    #Save results
    with open(join(join(outdir, 'results'), f'{fbase}_results.pkl'),'wb') as fp:
        pickle.dump(results, fp)

    #save model
    if False:
        u_net.save(join(join(outdir, 'models'), f'{fbase}_model'))
    #print(np.max(results['predict_test']))
    #print(np.mean(results['predict_test']))
    print(fbase)
    return None
save_results(args, u_net, history,  train_ds, val_ds, test_ds)

[0.13525579869747162, 0.9495965838432312, 0.03801508992910385, 0.9056456089019775, 0.3891534209251404, 5377.0, 37296.0, 0.07715847343206406, 334642.0, 14365.0]
any_severe__36km_Rot_0_16_16_lrate_0.0001_spatial_dropout_0.15_i_2_filters_[64, 128, 256, 512]_size_[4, 3, 2, 2]_pool_[2, 2, 2, 2]_loss_binary_crossentropy


In [None]:
#Hyperparam search
from itertools import product
for rotation, p_s, lrate, cs, i, loss in product([0,1], [0.01, 0.1], [0.01, 0.1], ([2,1,2,1],), (1,2), ['binary_crossentropy']):
    X_train, y_train, mean, variance = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__training_data'), rotation, target_column)
    X_val, y_val = load_rotation(join('/work/samuel.varga/data/2to6_hr_severe_wx/DEEP_LEARNING/',f'wofs_dl_severe__2to6hr__rot_{rotation}__validation_data'), rotation, target_column)
    print(np.shape(X_train))
    print(np.shape(y_train))
    print(f'Validation Base Rate:{np.mean(y_val)}')
    print(f'Validation non-event Rate:{1-np.mean(y_val)}')
    #Convert to tf dataset
    train_ds = convert_to_tf((X_train,np.expand_dims(y_train, axis=-1)), batch_size)
    val_ds = convert_to_tf((X_val,np.expand_dims(y_val, axis=-1)), batch_size)
    
    #U-net architectural parameters
    conv_filters=[n*i for n in [32,64,128,256]]
    max_pool=[2,2,2,2]
    conv_layers =[{'filters': f, 'kernel_size': (s), 'pool_size': (p), 'strides': (p)} if p > 1
                       else {'filters': f, 'kernel_size': (s), 'pool_size': None, 'strides': None}
                       for s, f, p, in zip(cs, conv_filters, max_pool)]
    args={'lrate':lrate, 'loss':loss,'activation_conv':'relu','activation_out':'sigmoid',
         'p_spatial_dropout':p_s, 'filters':conv_filters, 'size':cs, 'pool':max_pool, 'shape':(16,16),
         'rotation':rotation,'target_column':target_column, 'i':i}
##
    #create u-net
    #Pick better loss/activation functions
    u_net = create_U_net_classifier_2D(image_size=args['shape'], nchannels=63, n_classes=1, conv_layers=conv_layers, p_spatial_dropout=args['p_spatial_dropout'], metrics=metrics,
                               lrate=args['lrate'], loss=args['loss'], activation_conv=args['activation_conv'], activation_out=args['activation_out'],
                                      normalization=(mean, variance))
##
    early_stopping_cb =keras.callbacks.EarlyStopping(patience=30, restore_best_weights=True,
                                                    min_delta=0.001, monitor='val_loss')
    tensorboard_cb= keras.callbacks.TensorBoard(log_dir=join(outdir,f'logs/Rot_{rotation}_p_s_{p_s}_lrate_{lrate}_cs_{cs}_i_{i}_loss_{loss}'), histogram_freq=1)
    history = u_net.fit(train_ds, epochs=100, verbose=True, validation_data = val_ds,
        callbacks=[early_stopping_cb, tensorboard_cb])
    
    #save_results(args, u_net, history, train_ds, val_ds, test_ds)

In [7]:
K.clear_session()
gc.collect()
#gpu = cuda.get_current_device()
#gpu.reset()
cuda.close()