In [1]:
import numpy as np
import pandas as pd
#import xarray as xr
import matplotlib.pyplot as plt
import sys
import os
from utils import lsdiag_read_utils, SHIPS_preprocess, RT_SHIPS_preprocess
import json

### `get_dyn_predictors(df,BASIN,HR_first,HR_last,is_INTERP=True,calc_POT=False)`<a name="get_dyn_predictors"></a>
This function calls `create_SHIPS_predictors_dyn` from `RT_SHIPS_preprocess.py`. `create_SHIPS_predictors_dyn` selects the specified dynamical predictors (here, we use generalized shear (`SHRG`), upper-level divergence (`D200`), lower-level vorticity (`Z850`), maximum potential intensity (`VMPI`), change in wind speed (`DELV`), mid-level relative humidity (`RHMD`), sea surface temperature (`RSST` or `NSST`, depending on basin), and ocean heat content (`RHCN` or `NOHC`, depending on basin). We also retain identifying information (`CASE`, `NAME`, `DATE_full`, `DTL`, `TIME`); and we muptiply `Z850` by -1 if we're making predictions in the Southern Hemisphere.  
# 

In [2]:
# ### `get_dyn_predictors(df,BASIN,HR_first,HR_last,is_INTERP=True,calc_POT=False)`<a name="get_dyn_predictors"></a>
# This function calls `create_SHIPS_predictors_dyn` from `RT_SHIPS_preprocess.py`. `create_SHIPS_predictors_dyn` selects the specified dynamical predictors (here, we use generalized shear (`SHRG`), upper-level divergence (`D200`), lower-level vorticity (`Z850`), maximum potential intensity (`VMPI`), change in wind speed (`DELV`), mid-level relative humidity (`RHMD`), sea surface temperature (`RSST` or `NSST`, depending on basin), and ocean heat content (`RHCN` or `NOHC`, depending on basin). We also retain identifying information (`CASE`, `NAME`, `DATE_full`, `DTL`, `TIME`); and we muptiply `Z850` by -1 if we're making predictions in the Southern Hemisphere.  
# 
# <b>Inputs:</b>
# * `df`: Dataframe containing contents of `lsdiag` file after scaling/land masking [Pandas dataframe]
# * `BASIN`: Basin in which we are making our predictions (relevant for selection of `SST` and `OHC` variables, as West Pacific and S. Hem use different ones) [str]
# * `HR_first`: first forecast time (should almost always be -12, needed for persistence [int]
# * `HR_last`: last forecast time (24, 48, 72, etc depending on forecast) [int]
# * `is_INTERP`: will interpolation between missing values be performed? default is True [boolean]
# * `calc_POT`: will we calculate `POT` ($VMPI - I_0$) or use `MPI`? default is False [boolean]
# 
# <b>Outputs:</b>
# * `SHIPS_dyn_out`: Dataframe containing only dynamical predictors [Pandas dataframe]

# In[6]:
def get_dyn_predictors_lsdiag(df,BASIN,HR_first,HR_last,is_INTERP=True,calc_POT=False):
    #
    FORE_use = np.arange(HR_first,HR_last+1,6)
    nsst_basin = ['ATLANTIC','EAST_PACIFIC','CENTRAL_PACIFIC']
    #
    if ibasin in nsst_basin:
        SST_sel = 'NSST'
        OHC_sel = 'NOHC'
    else:
        SST_sel = 'RSST'
        OHC_sel = 'RHCN'
   # if 'NOHC' in df.columns:
        #OHC_sel = 'NOHC'
    #else:
        #OHC_sel = 'RHCN'
    PREDICTORS_sel = ['ATCFID','CASE','DATE_full','VMAX','MSLP','DTL','TIME','SHRG','D200','Z850','VMPI','DELV','RHMD',SST_sel,OHC_sel]
    predictand_name = 'VMAX'
    #
    SHIPS_dyn_out = RT_SHIPS_preprocess.create_SHIPS_predictors_dyn(df,
                            PREDICTORS_sel,predictand_name,is_INTERP,FORE_use,calc_POT)
    # Multiply Z850 by =1 if in SH
    if BASIN == 'SOUTHERN_HEM':
        # print('multiply by -1 for SH')
        # print('before multiplying, Z850 mean is ',SHIPS_dyn_out['Z850'].mean())
        SHIPS_dyn_out['Z850'] = -1*SHIPS_dyn_out['Z850']
        # print('after multiplying, Z850 mean is ',SHIPS_dyn_out['Z850'].mean())
    # If we are in SH or WPac, rename RSST/RCHN to NSST/NOHC to match
    if ((BASIN == 'SOUTHERN_HEM') | (BASIN == 'WEST_PACIFIC')):
        SHIPS_dyn_out = SHIPS_dyn_out.rename(columns={'RSST':'NSST','RHCN':'NOHC'})
    # Rename DELV to DELV =12
    #SHIPS_dyn_out = SHIPS_dyn_out.rename(columns={'DELV':'DELV -12'})
    return SHIPS_dyn_out


In [3]:
# ### `get_IR_predictors(df,HR_first,HR_last)`<a name="get_IR_predictors"></a>
# This function calls `create_SHIPS_predictors_IR` from `RT_SHIPS_preprocess.py`. `create_SHIPS_predictors_IR` selects the specified infrared (IR) predictors (here, we use GOES brightness temp (`GOES Tb`), the standard deviation of GOES brightness temp (`s(GOES Tb)`), the cold pixel percentage below -50C (`pct < -50 C`), and the storm size estimator (`storm size`), as well as the first for principal components of the IR imageray (`PC1`, `PC2`, `PC3`, and `PC4`). We also retain identifying information (`CASE`, `NAME`, `DATE_full`, `DTL`, `TIME`). Note that our first four quantities come from `IR00`, while the last four come from `PC00`. We use `IRM1` and `IRM3` (and `PCM1`/`PCM3` for the PC analysis), respectively, if an IR image is not available close to the forecast time. 
# 
# <b>Inputs:</b>
# * `df`: Dataframe containing contents of `lsdiag` file after scaling/land masking [Pandas dataframe]
# * `HR_first`: first forecast time (should almost always be -12, needed for persistence [int]
# * `HR_last`: last forecast time (24, 48, 72, etc depending on forecast) [int]
# 
# <b>Outputs:</b>
# * `SHIPS_IR_out`: Dataframe containing only IR predictors [Pandas dataframe]
# * `IR00_var_names`: list of strings containing IR variable names (needed to put dataframes back together) [list of strings]
# * `PC00_var_names`: list of strings containing PC variable names (needed to put dataframes back together) [list of strings]
def get_IR_predictors_lsdiag(df,HR_first,HR_last):
    #
    FORE_use = np.arange(HR_first,HR_last+1,6)
    #
    predictors_sel_IR = ['ATCFID','CASE','DATE_full','TIME','IR00','IRM1','IRM3','PC00','PCM1','PCM3']
    # Identify time indices for desired IR variables (recall they are NOT time series)
    IR00_time_ind = [6,12,54,108]
    IR00_var_names = ['GOES Tb','s(GOES Tb)','pct < -50C','storm size']
    PC00_time_ind = [0,6,12,18]
    PC00_var_names = ['PC1','PC2','PC3','PC4']
    #
    #warnings.simplefilter(action='ignore',category=pd.errors.PerformanceWarning)
    SHIPS_IR_out = RT_SHIPS_preprocess.create_SHIPS_predictors_IR(df,
                        predictors_sel_IR,FORE_use,IR00_time_ind,IR00_var_names,PC00_time_ind,PC00_var_names)
    return SHIPS_IR_out,IR00_var_names,PC00_var_names

Load scale factors for SHIPS data.  We need to rescale some of the SHIPS predictors as many of them have been scaled up to make their values ints instead of floats. 

In [4]:
with open('SHIPS_factors.txt') as f:
    SHIPS_factors = f.read()
SHIPS_js = json.loads(SHIPS_factors)
SHIPS_js

{'TIME': [1, 'hr', 'time rel to current case'],
 'VMAX': [1, 'kt', 'max surf wind'],
 'MSLP': [1, 'hPa', 'min slp'],
 'TYPE': [1, 'n/a', 'storm type'],
 'HIST': [1, 'n/a', 'storm hist'],
 'DELV': [1, 'kt', 'intensity change (rel to 0)'],
 'INCV': [1, 'kt', '6 hr intensity change'],
 'LAT': [10, 'deg W', 'storm lat'],
 'LON': [10, 'deg N', 'storm lon'],
 'CSST': [10, 'C', 'clim. SST'],
 'CD20': [1, 'm', 'clim. depth 20C isotherm'],
 'CD26': [1, 'm', 'clim. depth 26C isotherm'],
 'COHC': [1, 'kJ-cm^-2', 'clim. ocean heat content'],
 'DTL': [1, 'km', 'dist. nearest land'],
 'OAGE': [10, 'hr', 'ocean age'],
 'NAGE': [10, 'hr', 'normalized ocean age'],
 'RSST': [10, 'C', 'Reynolds SST'],
 'DSST': [10, 'C', 'daily Reynolds SST'],
 'DSTA': [10, 'C', 'daily av. Renolds SST'],
 'PHCN': [1, 'kJ-cm^-2', 'est. ocean heat content'],
 'U200': [10, 'kt', '200 hPa U, r=200-800km'],
 'U20C': [10, 'kt', '200 hPa U, r=0-500km'],
 'V20C': [10, 'kt', '200 hPa V, r=0-500km'],
 'E000': [10, 'K', '1000 hPa th

We'll processs each basin separately.  So select a basin (`ATLANTIC`, `EAST_PACIFIC`, `WEST_PACIFIC`, or `SOUTHERN_HEM`)

In [5]:
# BASIN = 'SOUTHERN_HEM'
#
yr_start = 2019
yr_end = 2021
fname_load = 'DATA/processed/realtime/v2_SHIPS_realtime_predictors_{yr_start}-{yr_end}_ALL_basins.csv'.format(yr_start=yr_start,
                                                                                yr_end=yr_end)
SHIPS_loadx = pd.read_csv(fname_load)
SHIPS_loadx = SHIPS_loadx.drop(columns='Unnamed: 0')
allowed_stormno = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17','18','19',
                  '20','21','22','23','24','25','26','27','28','29','30','31','32','33','34','35','36','37','38',
                  '39','40','41','42','43','44','45','46','47','48','49']
SHIPS_load = SHIPS_loadx[SHIPS_loadx['ATCFID'].astype(str).str[2:4].isin(allowed_stormno)]

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


In [6]:
SHIPS_load.set_index(['TIME','ATCFID']).xs((0,'WP192019')).sort_values(by='DATE_full')


  return runner(coro)


Unnamed: 0_level_0,Unnamed: 1_level_0,ATCF Basin,MSLP,CASE,VMAX,DATE_full,DELV,LAT,LON,LON_AGE,CSST,...,PW21,PC00,EC00,ECM3,SH3C,SD3C,SG3C,SH8C,SD8C,SG8C
TIME,ATCFID,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
0,WP192019,WP,9999,1464,25,2019-09-27 06:00:00,9999,141,1370,E,296.0,...,,,,,,,,,,
0,WP192019,WP,9999,3030,30,2019-09-27 12:00:00,9999,145,1355,E,296.0,...,,,,,,,,,,
0,WP192019,WP,9999,2790,35,2019-09-27 18:00:00,9999,149,1345,E,296.0,...,,,,,,,,,,
0,WP192019,WP,9999,662,40,2019-09-28 00:00:00,9999,161,1322,E,295.0,...,,,,,,,,,,
0,WP192019,WP,9999,719,45,2019-09-28 06:00:00,9999,173,1305,E,293.0,...,,,,,,,,,,
0,WP192019,WP,9999,1891,50,2019-09-28 12:00:00,9999,178,1289,E,293.0,...,,,,,,,,,,
0,WP192019,WP,9999,2341,50,2019-09-28 18:00:00,9999,180,1275,E,292.0,...,,,,,,,,,,
0,WP192019,WP,9999,2207,55,2019-09-29 00:00:00,9999,186,1265,E,292.0,...,,,,,,,,,,
0,WP192019,WP,9999,1634,55,2019-09-29 06:00:00,9999,193,1256,E,290.0,...,,,,,,,,,,
0,WP192019,WP,9999,1669,65,2019-09-29 12:00:00,9999,202,1247,E,289.0,...,,,,,,,,,,


Now, we rescale the SHIPS predictors.  We take the scaling factors from the `json` file we created earlier and divide a given predictor by its scaling factor (assuming it has one; not all predictors have scaling factors). 

In [7]:
if 'LON_AGE' in SHIPS_load.columns:
    SHIPS_load = SHIPS_load.drop(columns={'LON_AGE'})
#df = SHIPS_load.apply(pd.to_numeric)
df = SHIPS_load
col_names = df.columns
i_col = 'CASE'
for i_col in col_names:
    print('feature is ',i_col)
    if i_col in SHIPS_js.keys():
        print("yay")
        factor = SHIPS_js[i_col][0]
        print('divide by ',factor)
        #
        df[i_col] = df[i_col].apply(pd.to_numeric,errors='coerce')
        
        df[i_col] = df[i_col]/factor
    else:
        print("nay")

feature is  ATCF Basin
nay
feature is  MSLP
yay
divide by  1
feature is  ATCFID
nay
feature is  CASE
nay
feature is  VMAX
yay
divide by  1
feature is  DATE_full
nay
feature is  TIME
yay
divide by  1
feature is  DELV
yay
divide by  1
feature is  LAT
yay
divide by  10
feature is  LON
yay
divide by  10
feature is  CSST
yay
divide by  10
feature is  DTL
yay
divide by  1
feature is  RSST
yay
divide by  10
feature is  RSST_AGE
nay
feature is  CD20
yay
divide by  1
feature is  CD26
yay
divide by  1
feature is  COHC
yay
divide by  1
feature is  RHCN
yay
divide by  1
feature is  RHCN_AGE
nay
feature is  PHCN
yay
divide by  1
feature is  IR00
yay
divide by  1
feature is  IRM3
yay
divide by  1
feature is  U200
yay
divide by  10
feature is  U20C
yay
divide by  10
feature is  V20C
yay
divide by  10
feature is  E000
yay
divide by  10
feature is  EPOS
yay
divide by  10
feature is  ENEG
yay
divide by  10
feature is  EPSS
yay
divide by  10
feature is  ENSS
yay
divide by  10
feature is  RHLO
yay
divide 

In [8]:
df.set_index(['ATCFID','TIME']).xs(('WP192019',0))

  return runner(coro)


Unnamed: 0_level_0,Unnamed: 1_level_0,ATCF Basin,MSLP,CASE,VMAX,DATE_full,DELV,LAT,LON,CSST,DTL,...,PW21,PC00,EC00,ECM3,SH3C,SD3C,SG3C,SH8C,SD8C,SG8C
ATCFID,TIME,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
WP192019,0.0,WP,9999.0,131,40.0,2019-10-03 12:00:00,9999.0,38.4,133.1,21.9,313.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,309,40.0,2019-10-02 18:00:00,9999.0,35.6,127.7,23.1,-84.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,362,40.0,2019-10-03 00:00:00,9999.0,37.5,129.6,21.4,40.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,607,45.0,2019-10-02 06:00:00,9999.0,33.3,124.7,23.4,207.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,610,90.0,2019-09-30 18:00:00,9999.0,26.3,122.7,26.9,154.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,662,40.0,2019-09-28 00:00:00,9999.0,16.1,132.2,29.5,843.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,696,50.0,2019-10-03 18:00:00,9999.0,39.2,134.4,20.6,297.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,719,45.0,2019-09-28 06:00:00,9999.0,17.3,130.5,29.3,772.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,834,50.0,2019-10-04 00:00:00,9999.0,39.1,136.1,21.2,198.0,...,,,,,,,,,,
WP192019,0.0,WP,9999.0,986,40.0,2019-10-03 06:00:00,9999.0,38.0,131.8,21.5,241.0,...,,,,,,,,,,


We have a few options for dealing with land:
1.  <code>SIMPLE_MASK</code>: Mask out all cases where <code>DTL</code> at <code>TIME = 0</code> or <code>TIME = max_time</code> is less than some <code>DTL_thresh</code> (normally, <code>DTL_thresh = 100 km</code>)
2.  <code>SIMPLE_w_INT</code>: Apply mask if <code>DTL</code> for a given case is less than <code>DTL_thresh</code> at any point between <code>TIME = 0</code> and <code>TIME = max_time</code>. 
3.  <code>SCALAR_MASK</code>: For all cases where <code>DTL</code> at <code>TIME = 0</code> or <code>TIME = max_time</code> is less than some <code>DTL_thresh</code>, multiply the <code>DTL</code> by a scaling factor of 0.1 and use this <code>DTL_scalar</code> to reduce all SHIPS predictors accordingly. If <code>DTL</code> <= 0, scaling factor is 0. 
4.  <code>SCALAR_MASK_w_INT</code>: As in 3, but apply if <code>DTL <= DTL_thresh</code> at any point between <code>TIME = 0</code> and <code>TIME = max_time</code>. 
5.  <code>no_mask</code>: do not mask out over land.
    
<i>Note to self</i>:  Types 1, 2, and 5 have been implemented; 3 and 4 are future work

Apply desired mask (note: this step can take a few minutes)

In [9]:
mask_TYPE = 'SIMPLE_MASK'
to_IND = ['ATCFID','CASE','DATE_full','TIME']
SHIPS_mask = SHIPS_preprocess.apply_land_mask(df,mask_TYPE,to_IND)
# SHIPS_mask.set_index(['ATCFID','TIME']).xs(('AL012020',0))
#SHIPS_mask.set_index(['ATCF Basin','ATCFID','TIME']).xs(0,level=2).xs('WP').sort_values(by='DATE_full').plot(x='DATE_full',y='VMAX')

applying mask


Select specific years and forecast hours to include.  Default settings are:
* <b>years</b>: 2005-most recent year with full SHIPS data (usually `current_year - 1`, unless it's very early in the year and the best-tracks haven't been made available).  
* <b>hours</b>: -12 hours to 24 hours (could go out to 48 or 72 if desired)

In [10]:
YR_first = 2005
YR_last = 2022
HR_first = -12
HR_last = 24
#
YEARS_use = np.arange(YR_first,YR_last,1)
FORE_use = np.arange(HR_first,HR_last+1,6)

Trim to desired years.  Hold off on trimming hours for now; the way the IR predictors are arranged means we need forecast hours past 24. 

In [11]:
#SHIPS_mask = SHIPS_mask#.reset_index()
SHIPS_trim_yrs = SHIPS_mask[pd.to_datetime(SHIPS_mask['DATE_full']).dt.year.isin(YEARS_use)]

In [12]:
SHIPS_trim_yrs['BASIN'] = SHIPS_trim_yrs['ATCFID'].astype(str).str[0:2]
SHIPS_trim_yrs['BASIN'] = SHIPS_trim_yrs['BASIN'].replace({'WP':'WEST_PACIFIC','AL':'ATLANTIC','EP':'EAST_PACIFIC',
                                                          'SH':'SOUTHERN_HEM','CP':'CENTRAL_PACIFIC'})
SHIPS_trim_yrs = SHIPS_trim_yrs[SHIPS_trim_yrs['BASIN']!='IO']

Identify key predictors we want to use.  We'll differentiate between dynamical predictors and IR predictors because we process them a little differently.  Note that <code>CASE</code>, <code>NAME</code>, <code>DATE_full</code>, and <code>TIME</code> are included in both datasets for identification purposes.

For now, we use the following dynamical predictors:
* <code>SHRG</code>: generalized midlevel shear [kt$*$10]
* <code>D200</code>: upper level divergence [s$^{-1} \cdot 10^7$]
* <code>Z850</code>: lower level vorticity [s$^{-1} \cdot 10^7$]
* <code>VMAX</code>: maximum wind speed [kt] (use <code>VMAX</code> at time = 0)
* <code>VMPI</code>: maximum potential intensity [kt] (calculated based on <code>VMAX$_{t=0}$</code>
* <code>DELV</code>: change in intensity over previous 12 hours [kt] (use <code>DELV</code> from time = -12-0)
* <code>RHMD</code>: mid-level relative humidity [%]
* <code>NSST / RSST </code> (depends on <code>BASIN</code>): sea surface temperature [C*10]
* <code>NOHC / RHCN </code> (depends on <code>BASIN</code>): ocean heat content [J/kg-C if NOHC, kJ/cm$^2$ if RHCN]

If desired, we calculate an additional predictor, <code>POT</code> (potential intensity), by subtracting actual maximum winds from maximum potential intensity,
$$ POT_{time=i} = VMPI_{time=i} - VMAX_{time=0}$$

Otherwise, we'll use $VMPI$ and $VMAX_{time=0}$ as separate predictors, along with SST.

So now we select desired dynamical predictors.  We will keep only storm type 1 (for tropical cyclones), mask missing values (demarcated by 9999s), and calculate <code>POT</code>.  Finally, if we select <code>is_interp = True</code>, we'll linearly interpolate over missing values for our dynamic predictors. 

In [13]:
SHIPS_trim_yrs.columns

Index(['ATCF Basin', 'MSLP', 'ATCFID', 'CASE', 'VMAX', 'DATE_full', 'TIME',
       'DELV', 'LAT', 'LON',
       ...
       'PC00', 'EC00', 'ECM3', 'SH3C', 'SD3C', 'SG3C', 'SH8C', 'SD8C', 'SG8C',
       'BASIN'],
      dtype='object', length=154)

In [14]:
SHIPS_dyn_out = pd.DataFrame()
for ibasin in SHIPS_trim_yrs['BASIN'].unique():
    SHIPS_sel = SHIPS_trim_yrs.set_index(['BASIN']).xs(ibasin)
    
    iships = get_dyn_predictors_lsdiag(SHIPS_sel,ibasin,-12,24,is_INTERP=True,calc_POT=True)
    if ibasin == 'SOUTHERN_HEM':
        print('multiply by -1 for SH')
        print('before multiplying, Z850 mean is ',iships['Z850'].mean())
        iships['Z850'] = -1*iships['Z850']
        print('after multiplying, Z850 mean is ',iships['Z850'].mean())
    iships['BASIN'] = ibasin
    SHIPS_dyn_out = SHIPS_dyn_out.append(iships.reset_index())

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


interpolating over missing values


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


interpolating over missing values
multiply by -1 for SH
before multiplying, Z850 mean is  -2.657181102362211e-06
after multiplying, Z850 mean is  2.657181102362211e-06


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


interpolating over missing values


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


interpolating over missing values


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  self._setitem_single_block(indexer, value, name)


interpolating over missing values


In [15]:
import warnings
warnings.simplefilter(action='ignore',category=pd.errors.PerformanceWarning)

The IR predictors are derived from GOES data and are not actually time-dependent. They are calculated based on GOES data close to the time of each case.  <code>IR00</code> predictors are generally related to GOES Ch4 brightness temperatures averaged over some radial area (relative to the storm center).  <code>IRM1</code> and <code>IRM3</code> are the same as <code>IR00</code> but derived from satellite data taken 1.5 and 3 hours earlier, respectively, than <code>IR00</code>. If <code>IR00</code> is missing, we can fill it in with <code>IRM1</code>.  If <code>IRM1</code> is missing, we can use <code>IRM3</code>.  If all three are missing for a given case, we will use <code>NaN</code> values. 

The <code>PC00</code> predictors are principal components and related variables from IR imagery. Again, <code>PC00</code> is derived from satellite imagery and is not time-dependent. Regarding <code>PCM1</code> and <code>PCM3</code>, we will follow the same procedure as the <code>IRM1</code> and <code>IRM3</code>.  

Ultimately, we're only going to use a few IR predictors.  Since they are not time-dependent, we'll populate the <code>time</code> dimension with the same values of <code>IR00</code>, for example.  The variables we are interested in for our actual predictions are:
* <code>IR00</code> (and <code>IRM1/IRM3</code> as needed):
    * Average GOES Ch4* brightness temperature from r=0-200 km [deg C * 10], stored at t = +6 hours
    * Standard deviation of GOES Ch4 brightness temperature from r=0-200 km [deg C * 10], stored at t = +12 hours
    * Percent area of GOES Ch4 brightness temperature < -50 C for r = 50-200 km [%], stored at t = +54 hours
    * Storm size paramter, stored at t = +108 hours
* <code>PC00</code> (and <code>PCM1/PCM3</code> as needed):
    * First four principal components of IR imagery, stored at 0, +6, +12, and +18 hours, respectively
    

*note to self, Ch4 refers to Channel 4 in the old GOES.  What this actually refers to is the 10.3 um band, which is channel 13 on the current versions of GOES

In [16]:
SHIPS_IR_out = pd.DataFrame()
for ibasin in SHIPS_trim_yrs['BASIN'].unique():
    SHIPS_sel = SHIPS_trim_yrs.set_index(['BASIN']).xs(ibasin)
    
    iships,IR00_var_names,PC00_var_names = get_IR_predictors_lsdiag(SHIPS_sel,-12,24)
    iships['BASIN'] = ibasin
    SHIPS_IR_out = SHIPS_IR_out.append(iships.reset_index())

SHIPS_IR_out = create_SHIPS_predictors_IR(SHIPS_trim_yrs,
                        predictors_sel_IR,FORE_use,IR00_time_ind,IR00_var_names,PC00_time_ind,PC00_var_names)

Combine into one dataframe

In [17]:
SHIPS_process_all = SHIPS_dyn_out
SHIPS_IR_out = SHIPS_IR_out.reset_index()
SHIPS_process_all[IR00_var_names] = SHIPS_IR_out[IR00_var_names]
SHIPS_process_all[PC00_var_names] = SHIPS_IR_out[PC00_var_names]

In [18]:
foo = SHIPS_process_all[SHIPS_process_all['TIME']==0]


In [19]:
#SHIPS_process_all['CYCLONE NO'] = SHIPS_process_all.ATCFID.str[2:4]
SHIPS_process_all.to_csv('DATA/processed/realtime/SHIPS_realtime_predictors_2019-2021_ALL_basins.csv')

Get <code>n</code> hour change in <code>VMAX</code> (intensity change). Specify start of period, end of period, and length of period, in hours. We'll calculate the SHIPS model forecast <code>n</code>-hour change in <code>VMAX</code>, and the actual <code>VMAX</code> 24 hours later. SHIPS predictors are released in 6-hour intervals, so <code>n_hours</code> should be in 6-hourly increments.  

In [20]:
foo = SHIPS_process_all.set_index(['ATCFID','TIME','DATE_full']).xs(0,level=1).sort_index()
dvnew = pd.DataFrame(index=foo.index,columns=['d_V'])
for atcfi in SHIPS_process_all['ATCFID'].unique().tolist():
    f1 = foo.xs(atcfi)
    dates = f1.index
    first_date = dates[0]
    last_date = dates[-1]
    date_range = pd.date_range(first_date,last_date,freq='6H')
    for idate in date_range:
        d0 = idate
        d4 = pd.to_datetime(d0) + pd.Timedelta(1,'D')
        if (str(d4) in dates) & (str(d0) in dates):
            d_v = f1.loc[str(d4),'VMAX'] - f1.loc[str(d0),'VMAX']
            #print(d0,d_v)
            dvnew.loc[atcfi,str(d0)]['d_V'] = d_v
        #elif str(d0) in dates:
            #dvnew.loc[atcfi,str(d0)]['d_V'] = np.nan
foo['d_V'] = dvnew['d_V']

In [21]:
dvnew['basin'] = dvnew.index.get_level_values(0).str[0:2]
100*(foo[foo['d_V']>=30].reset_index().groupby(['BASIN'])['d_V'].count()/foo.reset_index().groupby(['BASIN'])['d_V'].count())



BASIN
ATLANTIC            6.698002
CENTRAL_PACIFIC          NaN
EAST_PACIFIC        6.928105
SOUTHERN_HEM        8.631579
WEST_PACIFIC       10.803324
Name: d_V, dtype: float64

Save