In [1]:
#Notebook to filter and preprocess the storms in the IBTrACS dataset.
#Successor to Explore_IBTrACS_v2.ipynb.

#10-19-20: adding regional tags
#10-22-20: adding Southern Hemisphere tags; genesis lat/lon; adjusting region boundaries
#10-29-20: adding genesis year and month variables
#11-05-20: adding variable for the Southern Hemisphere season

In [2]:
#What filtering to do? 

#Drop non-USA variables
#Make sure storm reaches at least TS intensity, otherwise delete the whole line
#Make sure times are every 6 hours so that ACE, density, etc. can be calculated correctly


In [3]:
#Seems like this intricate calculation cannot be vectorized and can only be done in a loop 

In [None]:
#How did Jeff do this? 
#In script: JS_C180v2_ParamSensTest_ObsDiffMaps.m
#Had a file called '/discover/nobackup/jdstron1/Observations_MonClimo.mat'
#So must have saved that from somewhere else.
#Should be in the file Observations_Compilation.m on sc9. 
#Need to download the sc9 files.

#Downloaded this one of Jeff's scripts. 
#Looks like used the lon_wmo, lat_wmo, time_wmo variables from v03r10 
#(which had a slightly different filename)
#I hav v04r00.
#He did NOT attempt to filter out times that weren't 6-hourly, 
#or storms that didn't reach 34 knots.

#Unless he got the data already filtered by someone else. 
#Can't find these wmo variables in the dataset I have.

In [2]:
import xarray as xr
import pandas as pd
import numpy as np

In [3]:
ds_IB = xr.open_dataset('IBTrACS.ALL.v04r00.nc')

In [4]:
#Drop non-US variables, and other irrelevant variables with datetime coordinates
ds_IB_slim = ds_IB.drop(labels=[
    'tokyo_lat'          ,
    'tokyo_lon'          ,
    'tokyo_grade'        ,
    'tokyo_wind'         ,
    'tokyo_pres'         ,
    'tokyo_r50_dir'      ,
    'tokyo_r50_long'     ,
    'tokyo_r50_short'    ,
    'tokyo_r30_dir'      ,
    'tokyo_r30_long'     ,
    'tokyo_r30_short'    ,
    'tokyo_land'         ,
    'cma_lat'            ,
    'cma_lon'            ,
    'cma_cat'            ,
    'cma_wind'           ,
    'cma_pres'           ,
    'hko_lat'            ,
    'hko_lon'            ,
    'hko_cat'            ,
    'hko_wind'           ,
    'hko_pres'           ,
    'newdelhi_lat'       ,
    'newdelhi_lon'       ,
    'newdelhi_grade'     ,
    'newdelhi_wind'      ,
    'newdelhi_pres'      ,
    'newdelhi_ci'        ,
    'newdelhi_dp'        ,
    'newdelhi_poci'      ,
    'reunion_lat'        ,
    'reunion_lon'        ,
    'reunion_type'       ,
    'reunion_wind'       ,
    'reunion_pres'       ,
    'reunion_tnum'       ,
    'reunion_ci'         ,
    'reunion_rmw'        ,
    'reunion_r34'        ,
    'reunion_r50'        ,
    'reunion_r64'        ,
    'bom_lat'            ,
    'bom_lon'            ,
    'bom_type'           ,
    'bom_wind'           ,
    'bom_pres'           ,
    'bom_tnum'           ,
    'bom_ci'             ,
    'bom_rmw'            ,
    'bom_r34'            ,
    'bom_r50'            ,
    'bom_r64'            ,
    'bom_roci'           ,
    'bom_poci'           ,
    'bom_eye'            ,
    'bom_pos_method'     ,
    'bom_pres_method'    ,
    'nadi_lat'           ,
    'nadi_lon'           ,
    'nadi_cat'           ,
    'nadi_wind'          ,
    'nadi_pres'          ,
    'wellington_lat'     ,
    'wellington_lon'     ,
    'wellington_wind'    ,
    'wellington_pres'    ,
    'ds824_lat'          ,
    'ds824_lon'          ,
    'ds824_stage'        ,
    'ds824_wind'         ,
    'ds824_pres'         ,
    'td9636_lat'         ,
    'td9636_lon'         ,
    'td9636_stage'       ,
    'td9636_wind'        ,
    'td9636_pres'        ,
    'td9635_lat'         ,
    'td9635_lon'         ,
    'td9635_wind'        ,
    'td9635_pres'        ,
    'td9635_roci'        ,
    'neumann_lat'        ,
    'neumann_lon'        ,
    'neumann_class'      ,
    'neumann_wind'       ,
    'neumann_pres'       ,
    'mlc_lat'            ,
    'mlc_lon'            ,
    'mlc_class'          ,
    'mlc_wind'           ,
    'mlc_pres'           ,
    'bom_gust'           ,
    'bom_gust_per'       ,
    'reunion_gust'       ,
    'reunion_gust_per'   ,
    'dist2land'                   ,
    'landfall'                   ,
    'usa_r34'                   ,
    'usa_r50'                   ,
    'usa_r64'                   ,
    'usa_sshs'                   ,
    'usa_poci'                   ,
    'usa_roci'                   ,
    'usa_rmw'                   ,
    'usa_eye'                   ,
    'usa_gust'                   ,
    'usa_seahgt'                   ,
    'usa_searad'                   ,
    'storm_speed'                   ,
    'storm_dir'                   ,
    'nature'                   ,
    #'wmo_wind'                   ,
    #'wmo_agency'                   ,
    #'wmo_pres'                   ,
    'track_type'                   ,
    'main_track_sid'                   ,
    'iflag'                   ,
    'basin'                   ,
    'subbasin'                   ,
    'iso_time'                   ,
    'usa_atcf_id'                   ,
    'usa_record'                   ])

In [5]:
#Subset for 1980-2000
ds_since80 = ds_IB_slim.loc[dict(date_time=ds_IB_slim.date_time, storm=ds_IB_slim.storm[(ds_IB_slim['time.year'] >= 1980)[:,0]])]
ds_80_00 = ds_since80.loc[dict(date_time=ds_since80.date_time, storm=ds_since80.storm[(ds_since80['time.year'] < 2001)[:,0]])]

In [6]:
#print(ds_80_00['usa_wind'])
#print(ds_80_00['usa_record'].description) #This variable isn't necessary, things like landfall time
#for i in np.arange(2000):
    #print(np.mod(ds_80_00['time.hour'].data[i,:], 6))
    #Looks like almost all the storms were 3-hourly, but some also have weird hours in the middle. 

In [7]:
#for i in np.arange(100):
#    print(ds_80_00['iso_time'].data[i,:])
#Still 3-hourly, just a different datetime format

In [8]:
#print(ds_80_00)

In [9]:
#print(ds_80_00.isel(storm=0)['time'])

In [10]:
#print(ds_80_00['usa_wind'])

In [11]:
#Loop through the storms, drop any times not at 0, 6, 12, or 18z, and pad the end with NaNs to keep length the same; 
#also calculate max lifetime wind and set the entire row to nans if it's less than 34 knots.
#Then loop through again and delete any rows that are all nans? (No, taken care of by .where())

numStorms = len(ds_80_00.storm)
numDT = len(ds_80_00.date_time)
#print(numStorms) #2398


ds_80_00_processed = ds_80_00.copy()
for i in np.arange(numStorms):
#for i in np.arange(30):   
    if np.mod(i, 50) == 0:
        print('Processing storm: '+ str(i))
    storm_row = ds_80_00.isel(storm=i)
    
    usa_lat_temp = storm_row['usa_lat']
    usa_lon_temp = storm_row['usa_lon']
    usa_wind_temp = storm_row['usa_wind']
    usa_pres_temp = storm_row['usa_pres']
    usa_status_temp = storm_row['usa_status']
    usa_agency_temp = storm_row['usa_agency']
    wmo_wind_temp = storm_row['wmo_wind']
    wmo_pres_temp = storm_row['wmo_pres']
    wmo_agency_temp = storm_row['wmo_agency']
    time_temp = storm_row['time']
    lat_temp = storm_row['lat']
    lon_temp = storm_row['lon']
    
    #Drop any non-6-hourly time points from each variable
    usa_lat_6h = usa_lat_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    usa_lon_6h = usa_lon_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    usa_wind_6h = usa_wind_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    usa_pres_6h = usa_pres_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    usa_status_6h = usa_status_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    usa_agency_6h = usa_agency_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    wmo_wind_6h = wmo_wind_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    wmo_pres_6h = wmo_pres_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    wmo_agency_6h = wmo_agency_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    time_6h = time_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    lat_6h = lat_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    lon_6h = lon_temp.where(np.mod(usa_lat_temp['time.hour'], 6) == 0, drop=True)
    
    #Pad the row with nans
    #This isn't working...
    #Running into issue because of the "coordinates" not actually being the same as the dimensions
    #Instead, assign values in the "complete" array to the 6-hourly values and the rest as nans
#     len_pad = len(storm_row.date_time) - len(usa_lat_6h)
#     index_pad = pd.Index(np.arange(len_pad)+len(storm_row.date_time), name='date_time')
#     nan_pad = xr.DataArray(np.ones(len_pad)*np.nan, coords=None, dims=[index_pad])
    
#     print(usa_lat_6h)
#     print(nan_pad)
    
#     usa_lat_6h_pad = xr.concat([usa_lat_6h, nan_pad], dim='date_time')
#     usa_lon_6h_pad = xr.concat([usa_lon_6h, nan_pad], dim='date_time')
#     usa_wind_6h_pad = xr.concat([usa_wind_6h, nan_pad], dim='date_time')
#     usa_pres_6h_pad = xr.concat([usa_pres_6h, nan_pad], dim='date_time')
#     usa_status_6h_pad = xr.concat([usa_status_6h, nan_pad], dim='date_time')
#     usa_agency_6h_pad = xr.concat([usa_agency_6h, nan_pad], dim='date_time')
    
    #Assign the filtered storms to the copy (this is made easier by storm and date_time indices being integers)
    len_pad = len(storm_row.date_time) - len(usa_lat_6h)
    fill_indices = np.arange(len(usa_lat_6h))
    pad_indices = np.arange(len_pad)+len(usa_lat_6h)
    
    #First assign the times: this one seems more difficult than anything else. 
    #Specifically, filling rest of values with NaT: how to do this?
    #Preallocate empty array and fill each variable with 'NaT'
    ds_80_00_processed['time'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = time_6h.data
    t_pad = np.empty(len_pad, dtype='datetime64[ns]')
    for k in np.arange(len_pad):
        t_pad[k] = np.datetime64('NaT')
    ds_80_00_processed['time'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = t_pad
    
    #Assign the rest of the time-varying variables
    ds_80_00_processed['usa_lat'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_lat_6h.data
    ds_80_00_processed['usa_lat'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['usa_lon'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_lon_6h.data
    ds_80_00_processed['usa_lon'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['usa_wind'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_wind_6h.data
    ds_80_00_processed['usa_wind'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan

    ds_80_00_processed['usa_pres'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_pres_6h.data
    ds_80_00_processed['usa_pres'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['usa_status'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_status_6h.data
    ds_80_00_processed['usa_status'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['usa_agency'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = usa_agency_6h.data
    ds_80_00_processed['usa_agency'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['wmo_wind'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = wmo_wind_6h.data
    ds_80_00_processed['wmo_wind'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan

    ds_80_00_processed['wmo_pres'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = wmo_pres_6h.data
    ds_80_00_processed['wmo_pres'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['wmo_agency'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = wmo_agency_6h.data
    ds_80_00_processed['wmo_agency'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['lat'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = lat_6h.data
    ds_80_00_processed['lat'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
    ds_80_00_processed['lon'].loc[dict(storm=ds_80_00.storm[i], date_time = fill_indices)] = lon_6h.data
    ds_80_00_processed['lon'].loc[dict(storm=ds_80_00.storm[i], date_time = pad_indices)] = np.ones(len_pad)*np.nan
    
##Calculate maximum wind for each storm (as a new variable?)
ds_80_00_processed['max_wind'] = ds_80_00_processed['usa_wind'].max(dim='date_time')

#Drop storms where max wind < 34 knots, using dataset.where
#Some storms apparently don't have ANY values for usa_wind, maybe had no US reporting at all (duplicates?)
#Should drop these too. e.g. np.nan > 34 yields False so this will work for those too.
ds_80_00_processed_34 = ds_80_00_processed.where(ds_80_00_processed['max_wind'] >= 34, drop=True)



Processing storm: 0
Processing storm: 50
Processing storm: 100
Processing storm: 150
Processing storm: 200
Processing storm: 250
Processing storm: 300
Processing storm: 350
Processing storm: 400
Processing storm: 450
Processing storm: 500
Processing storm: 550
Processing storm: 600
Processing storm: 650
Processing storm: 700
Processing storm: 750
Processing storm: 800
Processing storm: 850
Processing storm: 900
Processing storm: 950
Processing storm: 1000
Processing storm: 1050
Processing storm: 1100
Processing storm: 1150
Processing storm: 1200
Processing storm: 1250
Processing storm: 1300
Processing storm: 1350
Processing storm: 1400
Processing storm: 1450
Processing storm: 1500
Processing storm: 1550
Processing storm: 1600
Processing storm: 1650
Processing storm: 1700
Processing storm: 1750
Processing storm: 1800
Processing storm: 1850
Processing storm: 1900
Processing storm: 1950
Processing storm: 2000
Processing storm: 2050
Processing storm: 2100
Processing storm: 2150
Processing 

In [12]:
#This isn't so bad: seems to take about 6 seconds per 100 storms, or 1000 per minute.

In [13]:

#####   Add Additional Variables   #####


In [14]:
#Longitude from 0 to 360
ds_80_00_processed_34['usa_lon360'] = np.mod(ds_80_00_processed_34['usa_lon']+360,360)

In [15]:
#Regional flags


#Northern hemisphere
ds_80_00_processed_34['in_NH'] = ds_80_00_processed_34['usa_lat'] >= 0 

#Southern hemisphere
ds_80_00_processed_34['in_SH'] = ds_80_00_processed_34['usa_lat'] < 0 

#North Indian ocean
ds_80_00_processed_34['in_NI'] = np.logical_and(ds_80_00_processed_34['usa_lon360']>=35, 
                                       np.logical_or(np.logical_and(ds_80_00_processed_34['usa_lat']>8, 
                                                                    ds_80_00_processed_34['usa_lon360']<99),
                                                     np.logical_and(np.abs(ds_80_00_processed_34['usa_lat']-4)<=4, 
                                                                    ds_80_00_processed_34['usa_lon360']<((-3./4.)*ds_80_00_processed_34['usa_lat']+105))))
#Western Pacific
ds_80_00_processed_34['in_WP'] = np.logical_and(ds_80_00_processed_34['usa_lon360']<200, 
                                       np.logical_or(np.logical_and(ds_80_00_processed_34['usa_lat']>8, 
                                                                    ds_80_00_processed_34['usa_lon360']>=99),
                                                     np.logical_and(np.abs(ds_80_00_processed_34['usa_lat']-4)<=4, 
                                                                    ds_80_00_processed_34['usa_lon360']>=((-3./4.)*ds_80_00_processed_34['usa_lat']+105))))
#Eastern Pacific
ds_80_00_processed_34['in_EP'] = np.logical_and(ds_80_00_processed_34['usa_lon360']>=200, 
                                       np.logical_or(np.logical_and(ds_80_00_processed_34['usa_lat']>24,
                                                                    ds_80_00_processed_34['usa_lon360']<253),
                                                     np.logical_and(np.abs(ds_80_00_processed_34['usa_lat']-12)<=12, 
                                                                    ds_80_00_processed_34['usa_lon360']<((-7./4.)*ds_80_00_processed_34['usa_lat']+295))))

#North Atlantic
ds_80_00_processed_34['in_NA'] = np.logical_or(np.logical_and(ds_80_00_processed_34['usa_lat']>24,
                                                     ds_80_00_processed_34['usa_lon360']>=253),
                                      np.logical_and(np.abs(ds_80_00_processed_34['usa_lat']-12)<=12, 
                                                     ds_80_00_processed_34['usa_lon360']>=((-7./4.)*ds_80_00_processed_34['usa_lat']+295)))

#South Indian
ds_80_00_processed_34['in_SI'] = np.logical_and(np.logical_and(ds_80_00_processed_34['usa_lon360']>=25, 
                                                      ds_80_00_processed_34['usa_lon360']<105), 
                                       ds_80_00_processed_34['usa_lat']<0)
#Australian region
ds_80_00_processed_34['in_AUS'] = np.logical_and(np.logical_and(ds_80_00_processed_34['usa_lon360']>=105, 
                                                      ds_80_00_processed_34['usa_lon360']<165), 
                                       ds_80_00_processed_34['usa_lat']<0)
#South Pacific
ds_80_00_processed_34['in_SP'] = np.logical_and(np.logical_and(ds_80_00_processed_34['usa_lon360']>=165, 
                                                      ds_80_00_processed_34['usa_lon360']<290), 
                                       ds_80_00_processed_34['usa_lat']<0)
#South Atlantic
ds_80_00_processed_34['in_SA'] = np.logical_and(np.logical_and(ds_80_00_processed_34['usa_lon360']>=290, 
                                                      ds_80_00_processed_34['usa_lon360']<360), 
                                       ds_80_00_processed_34['usa_lat']<0)

In [16]:
#Genesis lat/lon, based on first non-nan latitude (including 360 degree lon)
#Genesis region flags, based on that lat/lon
#Done in another loop through the storms

#Preallocate with zeros or falses
ds_80_00_processed_34['gen_lat'] = ds_80_00_processed_34['numobs']*0 #This should have no nans
ds_80_00_processed_34['gen_lon'] = ds_80_00_processed_34['numobs']*0
ds_80_00_processed_34['gen_lon360'] = ds_80_00_processed_34['numobs']*0
for region in ['NH', 'SH', 'NI', 'WP', 'EP', 'NA', 'SI', 'AUS', 'SP', 'SA']:
    ds_80_00_processed_34['gen_'+region] = ds_80_00_processed_34['in_NH'].isel(date_time=0)*False
ds_80_00_processed_34['gen_month'] = ds_80_00_processed_34['numobs']*0
ds_80_00_processed_34['gen_year'] = ds_80_00_processed_34['numobs']*0

In [17]:
#np.array([True, False, True])*False #False, False, False. Yes, this preallocation method should work.

In [18]:
#print(ds_80_00_processed_34)

In [19]:
for i in np.arange(len(ds_80_00_processed_34.storm)):
    if np.mod(i,50) == 0:
        print('Adding genesis info for storm: ' + str(i))
    storm_lat_raw = ds_80_00_processed_34['usa_lat'].isel(storm=i).data
    storm_lon_raw = ds_80_00_processed_34['usa_lon'].isel(storm=i).data
    storm_lon360_raw = ds_80_00_processed_34['usa_lon360'].isel(storm=i).data
    storm_month_raw = ds_80_00_processed_34['time.month'].isel(storm=i).data
    storm_year_raw = ds_80_00_processed_34['time.year'].isel(storm=i).data
    
    lat_nonan = storm_lat_raw[~np.isnan(storm_lat_raw)]
    lon_nonan = storm_lon_raw[~np.isnan(storm_lon_raw)]
    lon360_nonan = storm_lon360_raw[~np.isnan(storm_lon360_raw)]
    month_nonan = storm_month_raw[~np.isnan(storm_lat_raw)]
    year_nonan = storm_year_raw[~np.isnan(storm_lat_raw)]
    
    gen_lat_i = lat_nonan[0]
    gen_lon_i = lon_nonan[0]
    gen_lon360_i = lon360_nonan[0]
    gen_month_i = month_nonan[0]
    gen_year_i = year_nonan[0]
    
    ds_80_00_processed_34['gen_lat'][i] = gen_lat_i
    ds_80_00_processed_34['gen_lon'][i] = gen_lon_i
    ds_80_00_processed_34['gen_lon360'][i] = gen_lon360_i
    ds_80_00_processed_34['gen_month'][i] = gen_month_i
    ds_80_00_processed_34['gen_year'][i] = gen_year_i
    
    
    #Just do each region check again--easier than getting the date_time index of the first non-nan

    ds_80_00_processed_34['gen_NH'][i] = gen_lat_i >= 0 

    ds_80_00_processed_34['gen_SH'][i] = gen_lat_i < 0 

    ds_80_00_processed_34['gen_NI'][i] = np.logical_and(gen_lon360_i>=35, 
                                           np.logical_or(np.logical_and(gen_lat_i>8, 
                                                                        gen_lon360_i<99),
                                                         np.logical_and(np.abs(gen_lat_i-4)<=4, 
                                                                        gen_lon360_i<((-3./4.)*gen_lat_i+105))))

    ds_80_00_processed_34['gen_WP'][i] = np.logical_and(gen_lon360_i<200, 
                                           np.logical_or(np.logical_and(gen_lat_i>8, 
                                                                        gen_lon360_i>=99),
                                                         np.logical_and(np.abs(gen_lat_i-4)<=4, 
                                                                        gen_lon360_i>=((-3./4.)*gen_lat_i+105))))

    ds_80_00_processed_34['gen_EP'][i] = np.logical_and(gen_lon360_i>=200, 
                                           np.logical_or(np.logical_and(gen_lat_i>24,
                                                                        gen_lon360_i<253),
                                                         np.logical_and(np.abs(gen_lat_i-12)<=12, 
                                                                        gen_lon360_i<((-7./4.)*gen_lat_i+295))))

    ds_80_00_processed_34['gen_NA'][i] = np.logical_or(np.logical_and(gen_lat_i>24,
                                                         gen_lon360_i>=253),
                                          np.logical_and(np.abs(gen_lat_i-12)<=12, 
                                                         gen_lon360_i>=((-7./4.)*gen_lat_i+295)))

    ds_80_00_processed_34['gen_SI'][i] = np.logical_and(np.logical_and(gen_lon360_i>=25, 
                                                          gen_lon360_i<105), 
                                           gen_lat_i<0)

    ds_80_00_processed_34['gen_AUS'][i] = np.logical_and(np.logical_and(gen_lon360_i>=105, 
                                                          gen_lon360_i<165), 
                                           gen_lat_i<0)

    ds_80_00_processed_34['gen_SP'][i] = np.logical_and(np.logical_and(gen_lon360_i>=165, 
                                                          gen_lon360_i<290), 
                                           gen_lat_i<0)

    ds_80_00_processed_34['gen_SA'][i] = np.logical_and(np.logical_and(gen_lon360_i>=290, 
                                                          gen_lon360_i<360), 
                                           gen_lat_i<0)



Adding genesis info for storm: 0
Adding genesis info for storm: 50
Adding genesis info for storm: 100
Adding genesis info for storm: 150
Adding genesis info for storm: 200
Adding genesis info for storm: 250
Adding genesis info for storm: 300
Adding genesis info for storm: 350
Adding genesis info for storm: 400
Adding genesis info for storm: 450
Adding genesis info for storm: 500
Adding genesis info for storm: 550
Adding genesis info for storm: 600
Adding genesis info for storm: 650
Adding genesis info for storm: 700
Adding genesis info for storm: 750
Adding genesis info for storm: 800
Adding genesis info for storm: 850
Adding genesis info for storm: 900
Adding genesis info for storm: 950
Adding genesis info for storm: 1000
Adding genesis info for storm: 1050
Adding genesis info for storm: 1100
Adding genesis info for storm: 1150
Adding genesis info for storm: 1200
Adding genesis info for storm: 1250
Adding genesis info for storm: 1300
Adding genesis info for storm: 1350
Adding genesis 

In [22]:
#Southern Hemisphere season based on month and year
#Really only need for the genesis time for the statistics I'm doing. More complex to do it
#for every data point.
ds_80_00_processed_34['gen_sh_season']=(ds_80_00_processed_34['gen_year']
                                        - 0.5 + np.floor(ds_80_00_processed_34['gen_month']/6.5))

In [23]:
print(ds_80_00_processed_34)

<xarray.Dataset>
Dimensions:        (date_time: 360, storm: 1817)
Coordinates:
    time           (storm, date_time) datetime64[ns] 1980-01-01T00:00:00.000040448 ... NaT
    lat            (storm, date_time) float32 -12.5 -11.914368 -11.5 ... nan nan
    lon            (storm, date_time) float32 172.5 172.41243 172.5 ... nan nan
Dimensions without coordinates: date_time, storm
Data variables:
    numobs         (storm) float32 41.0 79.0 69.0 129.0 ... 48.0 61.0 103.0
    sid            (storm) object b'1980001S13173' ... b'2000366S09068'
    season         (storm) float32 1980.0 1980.0 1980.0 ... 2000.0 2000.0 2001.0
    number         (storm) float64 1.0 3.0 5.0 9.0 ... 109.0 112.0 113.0 114.0
    name           (storm) object b'PENI' b'PAUL' b'AMY' ... b'SOULIK' b'ANDO'
    source_usa     (storm) object b'bsh051980.txt' ... b'bsh042001.txt'
    source_jma     (storm) object b'' b'' ... b''
    source_cma     (storm) object b'' b'' ... b''
    source_hko     (storm) object b'' b'' ...

In [23]:

#####   Save as NetCDF   #####


In [25]:
#ds_80_00_processed_34.to_netcdf('data_IBTrACS/IBTrACS_1980_2000_6h_min34_saved_20201014.nc')
#ds_80_00_processed_34.to_netcdf('data_IBTrACS/IBTrACS_1980_2000_6h_min34_saved_20201019.nc') #With regional tags
#ds_80_00_processed_34.to_netcdf('data_IBTrACS/IBTrACS_1980_2000_6h_min34_saved_20201022.nc') #With southern hemisphere and genesis regions
#ds_80_00_processed_34.to_netcdf('data_IBTrACS/IBTrACS_1980_2000_6h_min34_saved_20201029.nc') #With genesis month and year #Accidentally overwrote with the next one
ds_80_00_processed_34.to_netcdf('data_IBTrACS/IBTrACS_1980_2000_6h_min34_saved_20201105.nc') #With genesis SH season

In [21]:

#####   Explore output   #####


In [26]:
#print(ds_80_00_processed_34)
#Wind up with 1817 after excluding TDs and storms without US winds reported 

In [27]:
print(ds_80_00_processed_34['usa_wind'].data)

[[25. 25. 25. ... nan nan nan]
 [nan 25. 25. ... nan nan nan]
 [40. 40. 40. ... nan nan nan]
 ...
 [30. 30. 30. ... nan nan nan]
 [20. 25. 25. ... nan nan nan]
 [30. 20. 20. ... nan nan nan]]


In [28]:
print(ds_80_00_processed_34['usa_pres'].data)
#Huh? Don't have this for anything?
#Maybe should've kept wmo_pres, wmo_wind, wmo_agency

[[nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 ...
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [nan nan nan ... nan nan nan]]


In [29]:
print(ds_80_00_processed_34['wmo_wind'].data)
print(ds_80_00_processed_34['wmo_wind'])
#Some nans for WMO where USA has stuff; also nans for pressure in these cases

[[nan nan nan ... nan nan nan]
 [25. 25. 30. ... nan nan nan]
 [20. 25. 30. ... nan nan nan]
 ...
 [25. 25. 25. ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [25. 25. 25. ... nan nan nan]]
<xarray.DataArray 'wmo_wind' (storm: 1817, date_time: 360)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [25., 25., 30., ..., nan, nan, nan],
       [20., 25., 30., ..., nan, nan, nan],
       ...,
       [25., 25., 25., ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [25., 25., 25., ..., nan, nan, nan]], dtype=float32)
Coordinates:
    time     (storm, date_time) datetime64[ns] 1980-01-01T00:00:00.000040448 ... NaT
    lat      (storm, date_time) float32 -12.5 -11.914368 -11.5 ... nan nan nan
    lon      (storm, date_time) float32 172.5 172.41243 172.5 ... nan nan nan
Dimensions without coordinates: storm, date_time
Attributes:
    long_name:              Maximum sustained wind speed from Official WMO ag...
    units:                  kts
    coverage_content_type: 

In [30]:
print(ds_80_00['wmo_wind'].data)
print(ds_80_00['wmo_wind'])
#

[[nan nan nan ... nan nan nan]
 [nan nan 29. ... nan nan nan]
 [25. 25. 30. ... nan nan nan]
 ...
 [25. 25. 25. ... nan nan nan]
 [nan nan nan ... nan nan nan]
 [25. 25. 25. ... nan nan nan]]
<xarray.DataArray 'wmo_wind' (storm: 2398, date_time: 360)>
array([[nan, nan, nan, ..., nan, nan, nan],
       [nan, nan, 29., ..., nan, nan, nan],
       [25., 25., 30., ..., nan, nan, nan],
       ...,
       [25., 25., 25., ..., nan, nan, nan],
       [nan, nan, nan, ..., nan, nan, nan],
       [25., 25., 25., ..., nan, nan, nan]], dtype=float32)
Coordinates:
    time     (storm, date_time) datetime64[ns] 1980-01-01T00:00:00.000040448 ... NaT
    lat      (storm, date_time) float32 ...
    lon      (storm, date_time) float32 ...
Dimensions without coordinates: storm, date_time
Attributes:
    long_name:              Maximum sustained wind speed from Official WMO ag...
    units:                  kts
    coverage_content_type:  physicalMeasurement


In [31]:
print(ds_80_00_processed_34['wmo_pres'].data)
print(ds_80_00_processed_34['wmo_pres'].isel(storm=np.arange(20)).data)

[[  nan   nan   nan ...   nan   nan   nan]
 [1000. 1001.  999. ...   nan   nan   nan]
 [ 998.  997.  995. ...   nan   nan   nan]
 ...
 [1006. 1006. 1006. ...   nan   nan   nan]
 [1002. 1002. 1000. ...   nan   nan   nan]
 [1002. 1002. 1002. ...   nan   nan   nan]]
[[  nan   nan   nan ...   nan   nan   nan]
 [1000. 1001.  999. ...   nan   nan   nan]
 [ 998.  997.  995. ...   nan   nan   nan]
 ...
 [  nan   nan   nan ...   nan   nan   nan]
 [  nan   nan   nan ...   nan   nan   nan]
 [  nan   nan  997. ...   nan   nan   nan]]


In [32]:
print(ds_80_00_processed_34['usa_lat'].time)

<xarray.DataArray 'time' (storm: 1817, date_time: 360)>
array([['1980-01-01T00:00:00.000040448', '1980-01-01T06:00:00.000040448',
        '1980-01-01T12:00:00.000040448', ...,                           'NaT',
                                  'NaT',                           'NaT'],
       ['1980-01-02T18:00:00.000040448', '1980-01-03T00:00:00.000040448',
        '1980-01-03T06:00:00.000040448', ...,                           'NaT',
                                  'NaT',                           'NaT'],
       ['1980-01-04T12:00:00.000040448', '1980-01-04T18:00:00.000040448',
        '1980-01-05T00:00:00.000040448', ...,                           'NaT',
                                  'NaT',                           'NaT'],
       ...,
       ['2000-12-23T06:00:00.000040448', '2000-12-23T12:00:00.000040448',
        '2000-12-23T18:00:00.000040448', ...,                           'NaT',
                                  'NaT',                           'NaT'],
       ['2000-12-28T

In [33]:
#print(np.mod(usa_lat_temp['time.hour'].isel(date_time=1).data, 6))

In [34]:
for i in np.arange(100):
    print(ds_80_00_processed_34['time'].isel(storm=i, date_time=np.arange(10)).data)
    #6-hourly filtering seems to have worked.

['1980-01-01T00:00:00.000040448' '1980-01-01T06:00:00.000040448'
 '1980-01-01T12:00:00.000040448' '1980-01-01T18:00:00.000040448'
 '1980-01-02T00:00:00.000040448' '1980-01-02T06:00:00.000040448'
 '1980-01-02T12:00:00.000040448' '1980-01-02T18:00:00.000040448'
 '1980-01-03T00:00:00.000040448' '1980-01-03T06:00:00.000040448']
['1980-01-02T18:00:00.000040448' '1980-01-03T00:00:00.000040448'
 '1980-01-03T06:00:00.000040448' '1980-01-03T12:00:00.000040448'
 '1980-01-03T18:00:00.000040448' '1980-01-04T00:00:00.000040448'
 '1980-01-04T06:00:00.000040448' '1980-01-04T12:00:00.000040448'
 '1980-01-04T18:00:00.000040448' '1980-01-05T00:00:00.000040448']
['1980-01-04T12:00:00.000040448' '1980-01-04T18:00:00.000040448'
 '1980-01-05T00:00:00.000040448' '1980-01-05T06:00:00.000040448'
 '1980-01-05T12:00:00.000040448' '1980-01-05T18:00:00.000040448'
 '1980-01-06T00:00:00.000040448' '1980-01-06T06:00:00.000040448'
 '1980-01-06T12:00:00.000040448' '1980-01-06T18:00:00.000040448']
['1980-01-15T06:00:00.

In [35]:

#####   Datetime testing below here   #####


In [36]:
#t = np.empty(3, dtype='datetime64[ns]')

In [37]:
#t

In [38]:
#t[0] = np.datetime64('NaT')

In [39]:
#t