# NetCDF File Calulations and Manipulations

### Open NetCDF file, extract values, create new NetCDF file, print contents of said file

In [8]:
import xarray as xr

track_file = "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.nc"
DS = xr.open_dataset(track_file)
print(DS)
lons = DS.clon.values
lats = DS.clat.values
max_w = DS.vmax_2D.values
time = DS.time_str.values
DS.close()

<xarray.Dataset>
Dimensions:   (stormID: 1526, time: 131)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float32 ...
    clat      (stormID, time) float32 ...
    min_p     (stormID, time) float32 ...
    vmax_2D   (stormID, time) float32 ...
    time_str  (stormID, time) int32 ...
    seasons   (stormID) float64 ...
Attributes:
    description:    Results from tempestextremes StitchNodes reformatted to N...
    author:         A. M. Stansfield
    creation_date:  2021-05-07


In [10]:
new_ds = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons),
        clat = (["stormID", "time"], lats),
        vmax_2D = (["stormID", "time"], max_w),
        time_str = (["stormID", "time"], time)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all storms that fit a certain criteria",
        author = "Justin Willson",
        creation_date = "2021-07-08"
        ),
)

print(new_ds)

#new_ds.to_netcdf("test.nc")

<xarray.Dataset>
Dimensions:   (stormID: 1526, time: 131)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float32 111.72689 111.72689 110.57981 ... nan nan
    clat      (stormID, time) float32 5.70914 5.70914 5.430289 ... nan nan nan
    vmax_2D   (stormID, time) float32 13.81685 15.68618 21.85947 ... nan nan nan
    time_str  (stormID, time) int32 1984122618 1984122700 ... -2147483647
Attributes:
    description:    Simple NetCDF file with updated longitude, latitude, maxi...
    author:         Justin Willson
    creation_date:  2021-07-07


In [None]:
test_ds = xr.open_dataset("test.nc")
print(test_ds)

### Program to find total storms and total data points in a TC track file

In [12]:
import xarray as xr
import numpy as np
import statistics

track_file1 = "IBTrACS.NA.v04r00.19852014.storms.nc"
#track_file1 = "100km_analysis/IBTrACS.NA.v04r00.landfalling.storms.100km.buffer.pts.nc"
track_file2 = "300km_analysis/RCP85.COMB.NA.landfalling.storms.300km.buffer.pts.nc"

DS = xr.open_dataset(track_file2)
max_w = DS.vmax_2D.values
DS.close()

nstorms = np.shape(max_w)[0]  #get number of storms and times
ntimes = np.shape(max_w)[1]

n = 0
for i in range(nstorms):       #get number of storms excluding 0 entries
    if sum(max_w[i,:]) == 0:
        continue
    else:
        n = n+1

new_nstorms = n
print("nstorms = " + str(new_nstorms))

total_list = []
for i in range(nstorms):
    for j in range(ntimes):
        if np.isnan == True:
            continue
        else:
            total_list.append(max_w[i,j])

total_array = np.asarray(total_list)       #calculate the wind speed for each storm's points in buffer region
total_array = total_array[total_array > 0] #remove 0 entries
#median_int_all = np.median(total_array)    #calculate median
#median_int_all = np.median(total_array)* 0.51444444444444  #calculate median (if IBTrACS)
print("Num data points = " + str(len(total_array)))
#print("median int (all) = " + str(median_int_all))
percentile95_int_all = np.percentile(total_array, 95)
print("95th percentile int (all) = " + str(percentile95_int_all))

max_list = []
for i in range(nstorms):
    max_int = max(max_w[i,:])
    max_list.append(max_int)

max_array = np.asarray(max_list)          #calculate the max wind speed for each storm's points in buffer region
max_array = max_array[max_array > 0]      #remove 0 entries
#max_array = max_array * 0.51444444444444  #convert to m/s if file is IBTrACS
#median_int_max = np.median(max_array)     #calculate median
#print("median int (max) = " + str(median_int_max))
percentile95_int_max = np.percentile(max_array, 95)
print("95th percentile int (max) = " + str(percentile95_int_max))

avg_list = []
for i in range(nstorms):
    k = 0
    storm = max_w[i,:]
    storm = storm[storm > 0]
    if len(storm) == 0:
        avg_int = 0
    else:
        avg_int = statistics.mean(storm)
    avg_list.append(avg_int)

avg_array = np.asarray(avg_list)          #calculate the avg wind speed for each storm's points in buffer region
avg_array = avg_array[avg_array > 0]      #remove 0 entries
#avg_array = avg_array * 0.51444444444444  #convert to m/s if file is IBTrACS
#median_int_avg = np.median(avg_array)     #calculate median
#print("median int (avg) = " + str(median_int_avg))
percentile95_int_avg = np.percentile(avg_array, 95)
print("95th percentile int (avg) = " + str(percentile95_int_avg))

nstorms = 210
Num data points = 1259
95th percentile int (all) = 61.614412307739215
95th percentile int (max) = 68.36997909545893
95th percentile int (avg) = 50.92854245200988


### Program to calculate the percentage of storms above 50 m/s using a function

In [5]:
import xarray as xr
import numpy as np
from tc_analysis.tc_functions import *

track_file1 = "100km_analysis/IBTrACS.NA.v04r00.landfalling.storms.100km.buffer.pts.nc"
track_file2 = "100km_analysis/REF.COMB.NA.landfalling.storms.100km.buffer.pts.nc"
track_file3 = "100km_analysis/RCP45.COMB.NA.landfalling.storms.100km.buffer.pts.nc"
track_file4 = "100km_analysis/RCP85.COMB.NA.landfalling.storms.100km.buffer.pts.nc"

track_file5 = "200km_analysis/IBTrACS.NA.v04r00.landfalling.storms.200km.buffer.pts.nc"
track_file6 = "200km_analysis/REF.COMB.NA.landfalling.storms.200km.buffer.pts.nc"
track_file7 = "200km_analysis/RCP45.COMB.NA.landfalling.storms.200km.buffer.pts.nc"
track_file8 = "200km_analysis/RCP85.COMB.NA.landfalling.storms.200km.buffer.pts.nc"

track_file9 = "300km_analysis/IBTrACS.NA.v04r00.landfalling.storms.300km.buffer.pts.nc"
track_file10 = "300km_analysis/REF.COMB.NA.landfalling.storms.300km.buffer.pts.nc"
track_file11 = "300km_analysis/RCP45.COMB.NA.landfalling.storms.300km.buffer.pts.nc"
track_file12 = "300km_analysis/RCP85.COMB.NA.landfalling.storms.300km.buffer.pts.nc"

track_file13 = "IBTrACS.NA.v04r00.19852014.storms.nc"
track_file14 = "REF003.NA.storms.nc"

DS = xr.open_dataset(track_file13)
max_w1 = DS.vmax_2D.values* 0.51444444444444
DS.close()

DS = xr.open_dataset(track_file14)
max_w = DS.vmax_2D.values
DS.close()

percent1 = intensity_prob(max_w1, 50.0, direction='greater')*100
percent2 = intensity_prob(max_w, 50.0, direction='greater')*100
print(percent1)
print(percent2)

0.4895608351331893
2.601794016813037


### Program to calculate the translation speed of the points in a specified file

In [2]:
import xarray as xr
import numpy as np
import math
from tc_analysis.tc_functions import *

track_file1 = "100km_analysis/IBTrACS.NA.v04r00.landfalling.storms.100km.buffer.pts.nc"
track_file2 = "100km_analysis/REF.COMB.NA.landfalling.storms.100km.buffer.pts.nc"
track_file3 = "100km_analysis/RCP45.COMB.NA.landfalling.storms.100km.buffer.pts.nc"
track_file4 = "100km_analysis/RCP85.COMB.NA.landfalling.storms.100km.buffer.pts.nc"

track_file5 = "200km_analysis/IBTrACS.NA.v04r00.landfalling.storms.200km.buffer.pts.nc"
track_file6 = "200km_analysis/REF.COMB.NA.landfalling.storms.200km.buffer.pts.nc"
track_file7 = "200km_analysis/RCP45.COMB.NA.landfalling.storms.200km.buffer.pts.nc"
track_file8 = "200km_analysis/RCP85.COMB.NA.landfalling.storms.200km.buffer.pts.nc"

track_file9 = "300km_analysis/IBTrACS.NA.v04r00.landfalling.storms.300km.buffer.pts.nc"
track_file10 = "300km_analysis/REF.COMB.NA.landfalling.storms.300km.buffer.pts.nc"
track_file11 = "300km_analysis/RCP45.COMB.NA.landfalling.storms.300km.buffer.pts.nc"
track_file12 = "300km_analysis/RCP85.COMB.NA.landfalling.storms.300km.buffer.pts.nc"

file_list = [[track_file1, track_file2, track_file3, track_file4],     #create list of track files
             [track_file5, track_file6, track_file7, track_file8],
             [track_file9, track_file10, track_file11, track_file12]]

file_array = np.asarray(file_list)               #convert list to array for better indexing

ts_list = [[], [], []]                           #create empty lists to store data from track files

file_groups = np.shape(file_array)[0]            #calculate number of file groups(buffers) and files per group
files = np.shape(file_array)[1]

for i in range(file_groups):                     #iterate through every track file buffer group
    for j in range(files):                       #iterate through every track file in group
        ts_dist = []                             #initialize ts distribution list
        
        DS = xr.open_dataset(file_array[i,j])    #open file and extract arrays
        lons = DS.clon.values
        lats = DS.clat.values
        time = DS.time_str.values
        DS.close()

        nstorms = np.shape(lons)[0]  #get number of storms and times
        ntimes = np.shape(lons)[1]

        for k in range(nstorms):
            lon_array = lons[k,:]    #get lons and lats for each storm
            lat_array = lats[k,:]
            lon_array = lon_array[lon_array < 0]  #only keep legitimate TC track points
            lat_array = lat_array[lat_array > 0]
    
            if len(lon_array) != len(lat_array): #raise error if not all points have both a lon and lat coord
                raise ValueError('lat/lon lengths are not equal at storm ' + str(i))
              
            for m in range(len(lon_array)):
                
                if m == len(lon_array)-1:        #avoid conflicts at last point 
                    continue
                
                if file_array[i,j].count('IBTrACS') == 1:
                    time1 = np.datetime64(time[k,m].decode('UTF-8'))       
                    time2 = np.datetime64(time[k,m+1].decode('UTF-8'))
                    
                if file_array[i,j].count('IBTrACS') == 0:
                    time1_str = str(time[k,m])                #get initial strings in integer format
                    time2_str = str(time[k,m+1])              #convert to np.datetime64 format
                    time1_dt = time1_str[0:4] + '-' + time1_str[4:6] + '-' + time1_str[6:8] + ' ' + time1_str[8:10] + ':00:00'
                    time2_dt = time2_str[0:4] + '-' + time2_str[4:6] + '-' + time2_str[6:8] + ' ' + time2_str[8:10] + ':00:00'
                    time1 = np.datetime64(time1_dt)
                    time2 = np.datetime64(time2_dt)
                    
                time_diff = np.timedelta64(time2-time1, 'h')  #get difference in times
                expected_diff = np.timedelta64(6, 'h')        #set expected difference to 6 hours
                hours = 6.0                                   #numerical value of expected time diff between pts
                
                if time_diff == expected_diff:                #all time differences should equal expected difference
                    track_pt1 = (lon_array[m], lat_array[m])  #calculate translation speed in km/h for each point
                    track_pt2 = (lon_array[m+1], lat_array[m+1])
                    ts = get_distance(track_pt1, track_pt2) / hours 
                    ts_dist.append(ts)
        
        ts_list[i].append(ts_dist)     #append arrays into list

ts_array = np.asarray(ts_list)         #convert to array and print result

for i in range(file_groups):           #print medians
    for j in range(files):
        print(file_array[i,j] + ": " + str(np.median(ts_array[i,j]).round(2)))

100km_analysis/IBTrACS.NA.v04r00.landfalling.storms.100km.buffer.pts.nc: 13.07
100km_analysis/REF.COMB.NA.landfalling.storms.100km.buffer.pts.nc: 13.31
100km_analysis/RCP45.COMB.NA.landfalling.storms.100km.buffer.pts.nc: 13.5
100km_analysis/RCP85.COMB.NA.landfalling.storms.100km.buffer.pts.nc: 13.49
200km_analysis/IBTrACS.NA.v04r00.landfalling.storms.200km.buffer.pts.nc: 14.87
200km_analysis/REF.COMB.NA.landfalling.storms.200km.buffer.pts.nc: 14.68
200km_analysis/RCP45.COMB.NA.landfalling.storms.200km.buffer.pts.nc: 15.59
200km_analysis/RCP85.COMB.NA.landfalling.storms.200km.buffer.pts.nc: 15.25
300km_analysis/IBTrACS.NA.v04r00.landfalling.storms.300km.buffer.pts.nc: 15.65
300km_analysis/REF.COMB.NA.landfalling.storms.300km.buffer.pts.nc: 15.88
300km_analysis/RCP45.COMB.NA.landfalling.storms.300km.buffer.pts.nc: 16.8
300km_analysis/RCP85.COMB.NA.landfalling.storms.300km.buffer.pts.nc: 16.81


### Program to combine 3 NetCDF files into a new file

In [7]:
import xarray as xr
import numpy as np

track_file1 = "200km_analysis/RCP85.NA.landfalling.storms.200km.buffer.pts.nc"

DS1 = xr.open_dataset(track_file1)
lons1 = DS1.clon.values
lats1 = DS1.clat.values
max_w1 = DS1.vmax_2D.values
time1 = DS1.time_str.values
DS1.close()

track_file2 = "200km_analysis/RCP85002.NA.landfalling.storms.200km.buffer.pts.nc"

DS2 = xr.open_dataset(track_file2)
lons2 = DS2.clon.values
lats2 = DS2.clat.values
max_w2 = DS2.vmax_2D.values
time2 = DS2.time_str.values
DS2.close()

track_file3 = "200km_analysis/RCP85003.NA.landfalling.storms.200km.buffer.pts.nc"

DS3 = xr.open_dataset(track_file3)
lons3 = DS3.clon.values
lats3 = DS3.clat.values
max_w3 = DS3.vmax_2D.values
time3 = DS3.time_str.values
DS3.close()

nstorms = [np.shape(lons1)[0], np.shape(lons2)[0], np.shape(lons3)[0]]
ntimes = [np.shape(lons1)[1], np.shape(lons2)[1], np.shape(lons3)[1]]

comb_lons = np.zeros((sum(nstorms), max(ntimes)))
comb_lats = np.zeros((sum(nstorms), max(ntimes)))
comb_maxw = np.zeros((sum(nstorms), max(ntimes)))
comb_time = np.zeros((sum(nstorms), max(ntimes)), dtype = np.int32)

n = 0
for i in range(nstorms[0]):
    for j in range(ntimes[0]):
        comb_lons[n,j] = lons1[i,j]
        comb_lats[n,j] = lats1[i,j]
        comb_maxw[n,j] = max_w1[i,j] 
        comb_time[n,j] = time1[i,j]
    n = n+1

for i in range(nstorms[1]):
    for j in range(ntimes[1]):
        comb_lons[n,j] = lons2[i,j]
        comb_lats[n,j] = lats2[i,j]
        comb_maxw[n,j] = max_w2[i,j] 
        comb_time[n,j] = time2[i,j]
    n = n+1

for i in range(nstorms[2]):
    for j in range(ntimes[2]):
        comb_lons[n,j] = lons3[i,j]
        comb_lats[n,j] = lats3[i,j]
        comb_maxw[n,j] = max_w3[i,j] 
        comb_time[n,j] = time3[i,j]
    n = n+1

Make a new NetCDF file from results

In [8]:
todays_date = "2021-07-28"

new_ds = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], comb_lons),
        clat = (["stormID", "time"], comb_lats),
        vmax_2D = (["stormID", "time"], comb_maxw),
        time_str = (["stormID", "time"], comb_time)
        ),
        attrs = dict(
        description = "NetCDF file that combines the buffer points from all 3 RCP85 ensembles.",
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds)

new_ds.to_netcdf("200km_analysis/RCP85.COMB.NA.landfalling.storms.200km.buffer.pts.nc")

<xarray.Dataset>
Dimensions:   (stormID: 191, time: 107)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float64 -84.19 -80.92 -79.49 ... 0.0 0.0 0.0
    clat      (stormID, time) float64 25.62 31.24 32.61 32.94 ... 0.0 0.0 0.0
    vmax_2D   (stormID, time) float64 29.81 27.25 30.81 24.43 ... 0.0 0.0 0.0
    time_str  (stormID, time) int32 2070020800 2070020812 2070020818 ... 0 0 0
Attributes:
    description:    NetCDF file that combines the buffer points from all 3 RC...
    author:         Justin Willson
    creation_date:  2021-07-28
