In [1]:
#
# NOTE: This machine learning program is for predicting TC formation, using
#       input dataset in the NETCDF format. The program treats different
#       2D input fields as different channels of an image. This specific
#       program requires a set of 12 2D-variables (12-channel image) and
#       consists of three stages
#       - Stage 1: reading NETCDF input and generating (X,y) data with a
#                  given image sizes, which are then saved by pickle;
#       - Stage 2: import the saved pickle (X,y) pair and build a CNN model
#                  with a given training/validation ratio, and then save
#                  the train model under tcg_CNN.model.
#       - Stage 3: import the trained model from Stage 2, and make a list
#                  of prediction from normalized test data.
#
# INPUT: This Stage 3 script reads in the CNN trained model "tcg_CNN.model"
#        that is generated from Step 2.
#
#        Remarks: Note that the input data for this script must be on the
#        same as in Step 1 with standard 19 vertical
#        levels 1000, 975, 950, 925, 900, 850, 800, 750, 700, 650, 600,
#        550, 500, 450, 400, 350, 300, 250, 200. Also, all field vars must
#        be resize to cover an area of 30x30 around the TC center for the
#        positive data cases.
#        Similar to Step 2, this Step 3 needs to also have a large mem
#        allocation so that it can be run properly.
#
# OUTPUT: A list of probability forecast with the same dimension as the
#        number of input 12-channel images.
#
# HIST: - 01, Nov 22: Created by CK
#       - 02, Nov 22: Modified to optimize it
#       - 05, Jun 23: Rechecked and added F1 score function for a list of model
#       - 12, Jun 23: Customized for ResNet from the CNN functional model
#
# AUTH: Chanh Kieu (Indiana University, Bloomington. Email: ckieu@iu.edu)
#
#==========================================================================
import cv2
import tensorflow as tf
import os
from tqdm import tqdm
import netCDF4
import numpy as np

2023-06-13 11:28:04.865740: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.


In [2]:
#
# Function to return input data as an numpy array
#
def prepare(filepath):
    IMG_SIZE = 30
    number_channels = 12
    f = netCDF4.Dataset(filepath)
    abv = f.variables['absvprs']
    nx = np.size(abv[0,0,:])
    ny = np.size(abv[0,:,0])
    nz = np.size(abv[:,0,0])
    #print('Dimension of input NETCDF is: ',nx,ny,nz)
    a2 = np.zeros((nx,ny,number_channels))
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,0] = abv[1,j,i]    # abs vort at 950 mb
    rel = f.variables['rhprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,1] = rel[7,j,i]    # RH at 750 mb
    sfc = f.variables['pressfc']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,2] = sfc[j,i]      # surface pressure
    tmp = f.variables['tmpprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,3] = tmp[15,j,i]   # temperature at 400 mb
    tsf = f.variables['tmpsfc']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,4] = tsf[j,i]      # surface temperature
    ugr = f.variables['ugrdprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,5] = ugr[3,j,i]    # u-wind at 900 mb
            a2[i,j,6] = ugr[17,j,i]   # u-wind at 300 mb
    vgr = f.variables['vgrdprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,7] = vgr[3,j,i]    # v-wind at 900 mb
            a2[i,j,8] = vgr[17,j,i]   # v-wind at 300 mb
    hgt = f.variables['hgtprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,9] = hgt[3,j,i]    # geopotential at 850 mb
    wgr = f.variables['vvelprs']
    for i in range(a2.shape[0]):
        for j in range(a2.shape[1]):
            a2[i,j,10] = wgr[3,j,i]   # w-wind at 900 mb
            a2[i,j,11] = wgr[17,j,i]  # w-wind at 300 mb
    new_array = cv2.resize(a2, (IMG_SIZE, IMG_SIZE))
    #
    # normalize the data
    #
    #print('Number of channels to normalize is: ',number_channels)
    for var in range(number_channels):
        maxvalue = new_array[:,:,var].flat[np.abs(new_array[:,:,var]).argmax()]
        #print('Normalization factor for channel',var,', is: ',abs(maxvalue))
        new_array[:,:,var] = new_array[:,:,var]/abs(maxvalue)
    out_array = np.reshape(new_array, (-1, IMG_SIZE, IMG_SIZE, number_channels))
    #print('reshape new_array returns: ',out_array.shape)
    #input('Enter to continue...')
    return out_array

In [10]:
#
# build an F1-score function for later use
#
def F1_score(y_true,y_prediction,true_class,true_threshold):
    T = len(y_true)
    if len(y_prediction) != T:
        print("Prediction and true label arrays have different size. Stop")
        return
    P = 0
    TP = 0 
    FN = 0
    TN = 0
    FP = 0
    for i in range(T):
        if y_true[i] == true_class:
            P = P + 1       
            if y_prediction[i] >= true_threshold:
                TP += 1 
            else:
                FN += 1
        else:
            if y_prediction[i] >= true_threshold:
                FP += 1 
            else:
                TN += 1            
    N = T - P    
    F1 = 2.*TP/(2.*TP + FP + FN)
    Recall = TP/float(TP+FN)
    if TP == 0 and FP == 0: 
        Precision = 0.
    else:    
        Precision = TP/float(TP+FP)
    #print("TP,FP,TN,FN",TP,FP,TN,FN)    
    return F1, Recall, Precision

In [None]:
#
# loop thru all best-saved CNN trained models and make a prediction. Note that prediction is applied one by one instead 
# of a batch input. 
#
DATADIR = "/N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing"
bestmodels = ["tcg_ResNet22.model","tcg_ResNet40.model"]
CATEGORIES = ["neg", "pos"]
F1_performance = []
for bestmodel in bestmodels:
    model = tf.keras.models.load_model(bestmodel)
    prediction_total = 0
    prediction_yes = 0
    prediction_history = []
    truth_history = []
    for category in CATEGORIES:
        path = os.path.join(DATADIR,category)
        for img in tqdm(os.listdir(path)):    
            try:
                img_dir = DATADIR + '/' + category + '/' + img
                print('Processing image:', img_dir)
                #print('Input image dimension is: ',prepare(img_dir).shape)
                prediction = model.predict([prepare(img_dir)])
                print("TC formation prediction is",prediction,round(prediction[0][0]),CATEGORIES[round(prediction[0][0])])
                prediction_history.append(prediction[0][0])
                if round(prediction[0][0]) == 1:
                    prediction_yes += 1
                if category == "pos":
                    truth_history.append(1)
                else:
                    truth_history.append(0)
                prediction_total += 1    
                if prediction_total > 1000:
                    break
            except Exception as e:
                pass   
#
# Compute F1 score for each best model now
#
    print(prediction_history)
    F1_performance.append([bestmodel,F1_score(truth_history,prediction_history,1,0.5)])         

  0%|                                                                                                                                                                  | 0/54 [00:00<?, ?it/s]

Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210717_00_00_20.5_183.5.nc


  2%|██▊                                                                                                                                                       | 1/54 [00:04<03:54,  4.42s/it]

TC formation prediction is [[0.00014702]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210728_06_00_11.7_307.5.nc


  4%|█████▋                                                                                                                                                    | 2/54 [00:08<03:27,  4.00s/it]

TC formation prediction is [[0.00015427]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210830_00_00_58.2_348.5.nc


  6%|████████▌                                                                                                                                                 | 3/54 [00:12<03:22,  3.97s/it]

TC formation prediction is [[0.00427262]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210705_00_00_18.2_175.3.nc


  7%|███████████▍                                                                                                                                              | 4/54 [00:15<03:15,  3.90s/it]

TC formation prediction is [[0.00034354]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210805_00_00_31.6_197.4.nc


  9%|██████████████▎                                                                                                                                           | 5/54 [00:19<03:07,  3.83s/it]

TC formation prediction is [[0.00107187]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210519_00_00_-5.2_266.3.nc


 11%|█████████████████                                                                                                                                         | 6/54 [00:23<03:00,  3.77s/it]

TC formation prediction is [[0.00362061]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210612_00_00_18.2_160.0.nc


 13%|███████████████████▉                                                                                                                                      | 7/54 [00:27<02:57,  3.78s/it]

TC formation prediction is [[0.00344648]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210728_00_00_32.6_167.8.nc


 15%|██████████████████████▊                                                                                                                                   | 8/54 [00:30<02:52,  3.76s/it]

TC formation prediction is [[0.00233121]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210914_00_00_22.7_341.4.nc


 17%|█████████████████████████▋                                                                                                                                | 9/54 [00:34<02:47,  3.72s/it]

TC formation prediction is [[8.01071e-05]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210904_12_00_12.6_310.7.nc


 19%|████████████████████████████▎                                                                                                                            | 10/54 [00:37<02:41,  3.68s/it]

TC formation prediction is [[0.07584675]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210916_18_00_62.0_340.5.nc


 20%|███████████████████████████████▏                                                                                                                         | 11/54 [00:41<02:40,  3.72s/it]

TC formation prediction is [[0.30751792]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210922_18_00_11.8_197.5.nc


 22%|██████████████████████████████████                                                                                                                       | 12/54 [00:45<02:34,  3.68s/it]

TC formation prediction is [[0.00173608]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210621_00_00_11.2_197.4.nc


 24%|████████████████████████████████████▊                                                                                                                    | 13/54 [00:48<02:29,  3.65s/it]

TC formation prediction is [[1.2833103e-05]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210806_12_00_15.0_308.0.nc


 26%|███████████████████████████████████████▋                                                                                                                 | 14/54 [00:52<02:29,  3.73s/it]

TC formation prediction is [[0.00347058]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210929_00_00_57.0_338.0.nc


 28%|██████████████████████████████████████████▌                                                                                                              | 15/54 [00:56<02:27,  3.77s/it]

TC formation prediction is [[0.00882059]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210508_12_00_12.7_304.4.nc


 30%|█████████████████████████████████████████████▎                                                                                                           | 16/54 [01:00<02:21,  3.73s/it]

TC formation prediction is [[0.01316509]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210906_06_00_14.0_188.3.nc


 31%|████████████████████████████████████████████████▏                                                                                                        | 17/54 [01:04<02:18,  3.74s/it]

TC formation prediction is [[0.07262429]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210923_12_00_62.7_336.6.nc


 33%|███████████████████████████████████████████████████                                                                                                      | 18/54 [01:07<02:14,  3.75s/it]

TC formation prediction is [[0.7846691]] 1 pos
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210613_12_00_31.4_330.8.nc


 35%|█████████████████████████████████████████████████████▊                                                                                                   | 19/54 [01:11<02:09,  3.69s/it]

TC formation prediction is [[0.7613915]] 1 pos
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210724_00_00_25.0_199.5.nc


 37%|████████████████████████████████████████████████████████▋                                                                                                | 20/54 [01:15<02:05,  3.70s/it]

TC formation prediction is [[0.00018952]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210831_00_00_11.2_335.7.nc


 39%|███████████████████████████████████████████████████████████▌                                                                                             | 21/54 [01:18<02:02,  3.73s/it]

TC formation prediction is [[0.56215626]] 1 pos
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210922_18_00_48.4_147.8.nc


 41%|██████████████████████████████████████████████████████████████▎                                                                                          | 22/54 [01:22<01:58,  3.69s/it]

TC formation prediction is [[0.0772523]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210603_18_00_10.8_304.4.nc


 43%|█████████████████████████████████████████████████████████████████▏                                                                                       | 23/54 [01:26<01:54,  3.69s/it]

TC formation prediction is [[0.20249963]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210807_18_00_46.6_346.1.nc


 44%|████████████████████████████████████████████████████████████████████                                                                                     | 24/54 [01:30<01:52,  3.74s/it]

TC formation prediction is [[0.00015181]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210626_12_00_28.2_345.1.nc


 46%|██████████████████████████████████████████████████████████████████████▊                                                                                  | 25/54 [01:33<01:49,  3.78s/it]

TC formation prediction is [[0.01974428]] 0 neg
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210823_00_00_11.6_318.5.nc


 48%|█████████████████████████████████████████████████████████████████████████▋                                                                               | 26/54 [01:37<01:45,  3.76s/it]

TC formation prediction is [[0.7775186]] 1 pos
Processing image: /N/slate/ckieu/deep-learning/data/ncep_binary_30x30_00h/testing/neg/20210616_06_00_13.3_314.6.nc


In [None]:
#
# Print out the F1 performance of all models
#
print("========================================")
print("Summary of the ResNet model performance:")
for i in range(len(bestmodels)):
    print("Model, F1, Recall, Presision are:", F1_performance[i])
#print("Truth is:", truth_history)
#print("Predi is:", np.round(prediction_history))