# REF Simulation Analysis

This is the code used to analyze a typical NetCDF track file dataset. The REF simulation is a historical simulation from 1985-2014. Several other datasets were also analyzed in a similar manner so those Jupyter Notebooks were not included in this repository. Those notebooks included analysis of IBTrACS (historical observation from 1985-2014), RCP4.5 (global warming scenario from 2070-2100), and RCP8.5 (stronger global warming scenario from 2070-2100). 

### Information for NetCDF File Creation

In [8]:
todays_date = "2021-09-06"
distance = "200 km "
original_file = "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.003.nc"

### Program to get information about a NetCDF file using Xarray

In [2]:
import numpy as np
import xarray as xr

track_file = "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.nc" 
DS = xr.open_dataset(track_file) 

#print(DS)

#print(DS.clon)
#print(DS.clat)
#print(DS.min_p)
#print(DS.vmax_2D)
#print(DS.time_str)
#print(DS.seasons)

#time = DS.time_str.values
#print(time[1,0])
#print(time[1,1])
#print(time[1525,0])
#year = DS.seasons.values
#print(year)

DS.close()

#.nc total storms: 1526    // years: 1984-2014 (discard first storm for 1985)
#002.nc total storms: 1536 // years: 1985-2014
#003.nc total storms: 1444 // years: 1985-2014

### Program to filter out North Atlantic TCs from Global Dataset

In [None]:
# "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.nc" 1-1525
# "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.002.nc" all
# "trajectories.CORI.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.003.nc" all

import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import LineString, Point
from tc_analysis.tc_functions import *

first_tc = 1
last_tc = 1525

original_file = "trajectories.CHEY.VR28.NATL.REF.CAM5.4CLM5.0.dtime900.nc" #open NetCDF file and extract values
DS = xr.open_dataset(original_file) 
lons = DS.clon.sel(stormID=slice(first_tc, last_tc)).values                #only choose storms from 1985-2014
lats = DS.clat.sel(stormID=slice(first_tc, last_tc)).values
max_w = DS.vmax_2D.sel(stormID=slice(first_tc, last_tc)).values
time = DS.time_str.sel(stormID=slice(first_tc, last_tc)).values
DS.close() 

print(np.shape(lons))                            

lons, lats, max_w, time = ref_na_filter(lons, lats, max_w, time)           #filter out non-North Atlantic TCs

lons = change_lon_range(lons)      #change lon range to -180 to 180, also removes possible horizontal lines

Make NetCDF file of Results

In [None]:
new_ds = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons),
        clat = (["stormID", "time"], lats),
        vmax_2D = (["stormID", "time"], max_w),
        time_str = (["stormID", "time"], time)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all NA storms between 2070 and 2100 from the file " + original_file,
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds)

new_ds.to_netcdf("REF.NA.storms.nc")

### Program to find landfalling TCs and those that come within ___ km of the eastern US coast

In [14]:
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import LineString, Point
from tc_analysis.tc_functions import *

track_file = "REF003.NA.storms.nc" #open NetCDF file and extract values
DS = xr.open_dataset(track_file) 
lons = DS.clon.values             #only choose storms from 2070-2100
lats = DS.clat.values
max_w = DS.vmax_2D.values
time = DS.time_str.values
DS.close() 

shapefile1 = "shapefiles/eastern_us.shp"
shapefile2 = "shapefiles/eastern_us_300km.shp"

print(np.shape(lons))

#lons1, lats1, max_w1, time1 = landfall_filter(lons, lats, max_w, time, shapefile1)  #get landfalling storms
lons2, lats2, max_w2, time2 = landfall_filter(lons, lats, max_w, time, shapefile2)  #get storms within 100km of land

(398, 107)
(111, 107)


Make NetCDF files of results

In [15]:
"""
new_ds1 = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons1),
        clat = (["stormID", "time"], lats1),
        vmax_2D = (["stormID", "time"], max_w1),
        time_str = (["stormID", "time"], time1)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all NA landfalling storms between 2070 and 2100 from the file " + original_file,
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds1)

new_ds1.to_netcdf("REF.NA.landfalling.storms.nc")
"""

new_ds2 = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons2),
        clat = (["stormID", "time"], lats2),
        vmax_2D = (["stormID", "time"], max_w2),
        time_str = (["stormID", "time"], time2)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all NA storms that come within" + distance + "of the eastern US between 2070 and 2100 from the file " + original_file,
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds2)

new_ds2.to_netcdf("300km_analysis/REF003.NA.landfalling.storms.300km.nc")

<xarray.Dataset>
Dimensions:   (stormID: 111, time: 107)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float32 -84.447815 -83.232025 ... nan nan
    clat      (stormID, time) float32 30.064241 30.36162 30.824593 ... nan nan
    vmax_2D   (stormID, time) float32 16.27279 15.26212 12.32979 ... nan nan nan
    time_str  (stormID, time) int32 1985061506 1985061512 ... -2147483647
Attributes:
    description:    Simple NetCDF file with updated longitude, latitude, maxi...
    author:         Justin Willson
    creation_date:  2021-07-27


### Program to find the points on land, in the ocean, and in the ___ km buffer for the storms that come within ___ km of the eastern US coast

In [9]:
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import LineString, Point
from tc_analysis.tc_functions import *

track_file = "200km_analysis/REF003.NA.landfalling.storms.200km.nc"    #open NetCDF file and extract values
DS = xr.open_dataset(track_file) 
lons = DS.clon.values              #only choose storms from 1985-2014
lats = DS.clat.values
max_w = DS.vmax_2D.values
time = DS.time_str.values
DS.close() 

shapefile1 = "shapefiles/eastern_us.shp"
shapefile2 = "shapefiles/eastern_us_200km.shp"

print(np.shape(lons))

#lons1, lats1, max_w1, time1 = get_landfall_points(lons, lats, max_w, time, shapefile1)  #get points on land
lons2, lats2, max_w2, time2 = get_ocean_points(lons, lats, max_w, time, shapefile1)     #get points outside of land
lons3, lats3, max_w3, time3 = get_landfall_points(lons2, lats2, max_w2, time2, shapefile2) #get points in buffer
lons3, lats3, max_w3, time3 = remove_unwanted_points(lons3, lats3, max_w3, time3)   #remove points not in Atlantic

(98, 107)
(98, 107)
(98, 107)


Make NetCDF files of results

In [10]:
"""
new_ds1 = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons1),
        clat = (["stormID", "time"], lats1),
        vmax_2D = (["stormID", "time"], max_w1),
        time_str = (["stormID", "time"], time1)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all storm points that are on land between 2070 and 2100 from the file " + original_file,
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds1)

new_ds1.to_netcdf("REF.NA.landfalling.storms.100km.land.pts.nc")
"""

new_ds2 = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons2),
        clat = (["stormID", "time"], lats2),
        vmax_2D = (["stormID", "time"], max_w2),
        time_str = (["stormID", "time"], time2)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all storm points that are in the ocean between 2070 and 2100 from the file " + original_file,
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds2)

new_ds2.to_netcdf("200km_analysis/REF003.NA.landfalling.storms.200km.ocean.pts.nc")

new_ds3 = xr.Dataset(
        data_vars = dict(
        clon = (["stormID", "time"], lons3),
        clat = (["stormID", "time"], lats3),
        vmax_2D = (["stormID", "time"], max_w3),
        time_str = (["stormID", "time"], time3)
        ),
        attrs = dict(
        description = "Simple NetCDF file with updated longitude, latitude, maximum wind speed, and times for all landfalling storms in the eastern U.S. between 2070 and 2100 from the file " + original_file + ". Only points where the storm is within" + distance + "of land are included.",
        author = "Justin Willson",
        creation_date = todays_date
        ),
)

print(new_ds3)

new_ds3.to_netcdf("200km_analysis/REF003.NA.landfalling.storms.200km.buffer.pts.nc")

<xarray.Dataset>
Dimensions:   (stormID: 98, time: 107)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float32 -79.3074 -78.31302 -77.02615 ... nan nan
    clat      (stormID, time) float32 31.225912 31.267113 31.506567 ... nan nan
    vmax_2D   (stormID, time) float32 14.28621 19.68029 19.727 ... 0.0 0.0 0.0
    time_str  (stormID, time) int32 1985061606 1985061612 1985061618 ... 0 0 0
Attributes:
    description:    Simple NetCDF file with updated longitude, latitude, maxi...
    author:         Justin Willson
    creation_date:  2021-07-28
<xarray.Dataset>
Dimensions:   (stormID: 98, time: 107)
Dimensions without coordinates: stormID, time
Data variables:
    clon      (stormID, time) float32 -79.3074 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    clat      (stormID, time) float32 31.225912 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    vmax_2D   (stormID, time) float32 14.28621 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0
    time_str  (stormID, time) int32 1985061606 0 0

### Program to make a quick plot of results to confirm accuracy

In [9]:
import numpy as np
import xarray as xr
import geopandas as gpd
from shapely.geometry import LineString, Point
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
import matplotlib.patches as mpatches
import cartopy.crs as ccrs
import cartopy.feature as cf
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
from tc_analysis.tc_functions import *

track_file = "300km_analysis/REF003.NA.landfalling.storms.300km.buffer.pts.nc"    #open NetCDF file and extract values
DS = xr.open_dataset(track_file) 
lons = DS.clon.values                                            #only choose storms from 1985-2014
lats = DS.clat.values
max_w = DS.vmax_2D.values
time = DS.time_str.values
DS.close() 

gdf = gpd.read_file("shapefiles/eastern_us.shp")                 #open shapefile and create geodataframe
gdf = gdf.to_crs("EPSG:4326")                                    #convert to lat and lon from platecarree

plt.figure(figsize=(12,7))
ax = plt.axes(projection=ccrs.PlateCarree())
ax.set_extent([-63, -115, 21, 55], crs=ccrs.PlateCarree())       #plot eastern U.S.
ax.set_title('REF NA Basin 1985-2014', fontsize=20)
gl = ax.gridlines(crs=ccrs.PlateCarree(), draw_labels=True, alpha=0)
gl.xlabels_top = False
#gl.ylabels_right = False
#gl.xlocator = mticker.FixedLocator([-180, -45, 0, 45, 180])
#gl.ylocator = mticker.FixedLocator([-90, -45, 0, 45, 90])
gl.xformatter = LONGITUDE_FORMATTER
gl.yformatter = LATITUDE_FORMATTER
gl.xlabel_style = {'size': 14}
gl.ylabel_style = {'size': 14}
yellow_patch = mpatches.Patch(color='y', label='TD')    #create a legend
green_patch = mpatches.Patch(color='g', label='TS') 
cyan_patch = mpatches.Patch(color='c', label='Cat1') 
blue_patch = mpatches.Patch(color='b', label='Cat2') 
red_patch = mpatches.Patch(color='r', label='Cat3') 
black_patch = mpatches.Patch(color='k', label='Cat4/5') 
plt.legend(handles=[yellow_patch, green_patch, cyan_patch, blue_patch, 
                    red_patch, black_patch], loc='upper left')

gdf.plot(ax=ax, color='w', edgecolor='k')               #plot the shapefile 

#plot_tc_trajectories(lons, lats)                       #plot TCs 
plot_tc_points(lons, lats)

#ref_plot_trajectories_byintensity(lons, lats, max_w, ax)   #plot TC trajectories by intensity
#ref_plot_points_byintensity(lons, lats, max_w, ax)         #plot TC trajectory points by intensity

plt.savefig("Aref300.png")                                  #save figure in the same directory
plt.close()

### Program to create a histogram of all intensitites, avg intensity, and max intensity distributions

In [5]:
import numpy as np
import xarray as xr
import statistics
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
import geopandas as gpd
from shapely.geometry import LineString, Point

track_file = "REF003.NA.landfalling.storms.100km.buffer.pts.nc"     #open NetCDF file and extract values
DS = xr.open_dataset(track_file) 
lons = DS.clon.values         #only choose storms from 1985-2014
lats = DS.clat.values
max_w = DS.vmax_2D.values
time = DS.time_str.values
DS.close()

nstorms = np.shape(max_w)[0]  #get number of storms and times
ntimes = np.shape(max_w)[1]

max_list = []
for i in range(nstorms):
    max_int = max(max_w[i,:])
    max_list.append(max_int)

max_array = np.asarray(max_list)      #calculate the max wind speed for each storm's points in buffer region
max_array = max_array[max_array > 0]  #remove 0 entries

avg_list = []
for i in range(nstorms-1):
    k = 0
    storm = max_w[i,:]
    storm = storm[storm > 0]
    if len(storm) == 0:
        avg_int = 0
    else:
        avg_int = statistics.mean(storm)
    avg_list.append(avg_int)

avg_array = np.asarray(avg_list)      #calculate the avg wind speed for each storm's points in buffer region
avg_array = avg_array[avg_array > 0]  #remove 0 entries

total_list = []
for i in range(nstorms):
    for j in range(ntimes):
        total_list.append(max_w[i,j])

total_array = np.asarray(total_list)  #calculate the wind speed for each storm's points in buffer region
total_array = total_array[total_array > 0] #remove 0 entries

fig, ax = plt.subplots(3, 1, figsize=(7,10)) #plot these values in a 3 part histogram

bin_intervals = np.arange(0, 100, 10)

plot1 = ax[0].hist(total_array, bins=bin_intervals, edgecolor='k')
ax[0].set_ylabel('Frequency')
ax[0].set_title('REF003: Distribution of all Intensities (<= 100 km)')

plot2 = ax[1].hist(avg_array, bins=bin_intervals, edgecolor='k')
ax[1].set_ylabel('Frequency')
ax[1].set_title('Distribution of Average Intensities (<= 100 km)')

plot3 = ax[2].hist(max_array, bins=bin_intervals, edgecolor='k')
ax[2].set_xlabel('Wind Speed (m/s)')
ax[2].set_ylabel('Frequency')
ax[2].set_title('Distribution of Maximum Intensities (<= 100 km)')

plt.savefig("ref003_hist.png")
plt.close()