In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

##### Load RF model

In [2]:
def load_RF_model(yr_start,yr_end,mask,stand_str,RI_thresh,weights,scoring,RF_score='Random Forest'):

    RF_dir = '~/SHIPS/SHIPS_clean/Model_Results/VALIDATION/ALL_2019-2021_ONLY'
    #RF_dir = '~/SHIPS/SHIPS_clean/Model_Results/VALIDATION/'
    RF_ext = '_{RF_score}_SHIPS_VALID_RI_vs_no_RI_{yr_start}-{yr_end}_{mask}_{stand_str}_RI_thresh_{RI_thresh}weights_'\
    '{weights}_wts_{scoring}.csv'.format(RF_score=RF_score,yr_start=yr_start,yr_end=yr_end,mask=mask,
            stand_str=stand_str,RI_thresh=RI_thresh,weights=weights,scoring=scoring)
    y_pred_RF = pd.read_csv(RF_dir+'PREDICTED_Y_vals'+RF_ext)
    y_pred_RF = y_pred_RF.rename(columns={'Y test':'Y true'})
    return y_pred_RF

In [3]:
def load_LR_model(yr_start,yr_end,mask,stand_str,RI_thresh,weights,scoring):
    LR_models = ['lbfgs']
    LR_dir = '~/SHIPS/SHIPS_clean/Model_Results/VALIDATION/ALL_2019-2021_ONLY'
    #LR_dir = '~/SHIPS/SHIPS_clean/Model_Results/VALIDATION/'
    LR_ext1 = '_{model}_SHIPS_VALID_RI_vs_no_RI_{yr_start}-{yr_end}_{mask}_{stand_str}_RI_thresh_{RI_thresh}weights_'\
    '{weights}_wts_{scoring}.csv'.format(model=LR_models[0],yr_start=yr_start,yr_end=yr_end,mask=mask,
                                stand_str=stand_str,RI_thresh=RI_thresh,weights=weights,scoring=scoring)
    y_pred_LR1 = pd.read_csv(LR_dir+'PREDICTED_Y_vals'+LR_ext1)
    y_pred_LR1 = y_pred_LR1.rename(columns={'Y test':'Y true'})
    return y_pred_LR1

In [4]:
yr_start = 2019
yr_end = 2022
mask = 'SIMPLE_MASK'
stand_str = 'STANDARDIZED'
RI_thresh = 30
weights = 'default'
scoring = 'f1_weighted'
fig_format = 'pdf'

In [5]:
y_pred_RF = load_RF_model(yr_start,yr_end,mask,stand_str,RI_thresh,weights,scoring,RF_score='Random Forest')
y_pred_LR = load_LR_model(yr_start,yr_end,mask,stand_str,RI_thresh,weights,scoring)
y_pred_res = pd.concat([y_pred_RF,y_pred_LR])

##### Load SHIPS-RII

In [6]:
def load_edecks_FRIA(yr_start,yr_end):
    edeck_fria = pd.read_csv('VALIDATION_data/edecks/FRIA_etracks_RI_{yr_start}-{yr_end}.csv'.format(yr_start=
                                                                                    yr_start,yr_end=yr_end))
    #
    edeck_fria['ATCF BASIN'] = edeck_fria['BASIN']
    edeck_fria['ATCF BASIN'] = edeck_fria['ATCF BASIN']
                            

In [7]:
def load_edecks(yr):
    best_track = pd.DataFrame()
    e_deck = pd.DataFrame()
    ibest_track = pd.read_csv('VALIDATION_data/processed/best_tracks_{year_sel}.csv'.format(year_sel=yr))
    ie_deck = pd.read_csv('VALIDATION_data/edecks/etracks_RI_{year_sel}.csv'.format(year_sel=yr))
    ie_deck['YEAR'] = pd.to_datetime(ie_deck['DATE']).dt.year
    #
    ibest_track = ibest_track.drop(columns={'Unnamed: 0','TECHNUM','DEPTH','SEASCODE','WINDCODE','SEAS','SEAS1','SEAS2',
                                     'P Outer','R Outer','SUBREGION','MAXSEAS','INITIALS','SEAS3','SEAS4'})
    ibest_track['DATE'] = pd.to_datetime(ibest_track['DATE']).dt.date
    ibest_track['YEAR'] = pd.to_datetime(ibest_track['DATE']).dt.year
    ibest_track['ATCFID'] = ibest_track['BASIN']+ibest_track['CYCLONE NO']+pd.to_datetime(ibest_track['DATE']).dt.year.astype(str)
    ibest_track['BASIN'] = ibest_track['BASIN'].replace({'AL':'ATLANTIC','EP':'EAST_PACIFIC','WP':'WEST_PACIFIC',
                                                        'SH':'SOUTHERN_HEM','CP':'CENTRAL_PACIFIC'})
    #
    
    return ibest_track,ie_deck

#### Read in the FRIA edecks

In [8]:
edeck_fria = pd.read_csv('VALIDATION_data/edecks/FRIA_etracks_RI_2019-2021.csv')
edeck_fria['YEAR'] = pd.to_datetime(edeck_fria['DATE']).dt.year
edeck_fria['BASIN'] = edeck_fria['BASIN'].replace({'AL':'ATLANTIC','EP':'EAST_PACIFIC','CP':'CENTRAL_PACIFIC',
                                                  'WP':'WEST_PACIFIC','SH':'SOUTHERN_HEM'})
edeck_fria = edeck_fria.drop(columns={'Initials','Unnamed: 0'})
edeck_fria = edeck_fria.rename(columns={'ProbItem':'30/24'})

#### Read in the RIPA edecks

In [9]:
edeck_ripa = pd.read_csv('VALIDATION_data/edecks/RIPA_etracks_RI_2019-2021.csv')
edeck_ripa['YEAR'] = pd.to_datetime(edeck_ripa['DATE']).dt.year
edeck_ripa['BASIN'] = edeck_ripa['BASIN'].replace({'EP':'EAST_PACIFIC','WP':'WEST_PACIFIC','SH':'SOUTHERN_HEM'})
edeck_ripa = edeck_ripa.drop(columns={'Initials','Unnamed: 0'})
edeck_ripa = edeck_ripa.rename(columns={'ProbItem':'30/24'})
edeck_ripa['Tech'].unique()

array(['LDA0', 'LOGR', 'RAPC'], dtype=object)

In [10]:
yr_sel = [2019,2020,2021]
best_track = pd.DataFrame()
e_deck = pd.DataFrame()
for iyr in yr_sel:
    ibt,ied = load_edecks(iyr)
    #
    e_deck = e_deck.append(ied)
    best_track = best_track.append(ibt)
#
e_deck = e_deck.drop(columns={'Initials','Unnamed: 0'})
e_deck = e_deck.rename(columns={'ProbItem':'30/24'})
best_track = best_track[best_track['RAD']==34]

##### Get 24-hour changes in VMAX for best tracks
Get 24-hour changes in VMAX. Mask out cases where Timedelta > 1 Day (aka, going from one case to another). 

In [11]:
def bdeck_change(bdeck,dt=24,init_hr=0):
    d_hr = int(dt/6)
    ships = bdeck.set_index(['ATCFID','TIME'])
    nlev = ships.index.nlevels
    ships_t0 = ships
    ships_dt = ships_t0.shift(-d_hr)
    pred_num = ['VMAX','MSLP']
    date_diff = pd.to_datetime(ships_dt['DATE']) - pd.to_datetime(ships_t0['DATE'])
    diff = ships_dt[pred_num] - ships_t0[pred_num]
    diff = diff.where(date_diff == pd.Timedelta(1,'D'))
    best_track_diffs = ships.copy()
    best_track_diffs[['d{dt}_VMAX'.format(dt=dt),'d{dt}_MSLP'.format(dt=dt)]] = diff[pred_num]
    # Remove invests, etc
    cyclone_nos_allowed = ['01','02','03','04','05','06','07','08','09','10','11','12','13','14','15','16','17',
                          '18','19','20','21','22','23','24','25','26','27','28','29','30','31','32','33','34',
                          '35','36','37','38','39','40','41','42','43','44','45','46','47','48','49','50']
    best_track_diffs = best_track_diffs[best_track_diffs['CYCLONE NO'].isin(cyclone_nos_allowed)]
    return best_track_diffs

In [14]:
bdeck_diffs = bdeck_change(best_track)
best_track['CYCLONE NO'].unique()

array(['A4', '01', '12', '08', '11', '91', '19', '90', '07', 'A3', '13',
       'D2', 'C0', '05', 'E6', '98', '97', 'C5', 'B9', '16', 'C7', 'C2',
       '06', '81', '93', '15', 'A9', '03', '02', '09', 'B0', '96', 'C3',
       'A2', '18', 'A6', '25', '30', 'C6', '14', 'C9', 'D6', 'A1', '24',
       '22', '95', 'C1', '20', '80', '17', '04', '92', '29', 'B3', 'D8',
       'C8', 'D1', 'B5', 'A8', '99', '28', 'D3', '10', 'C4', 'B2', 'B1',
       'D4', 'A5', 'F1', 'B6', 'D0', 'B8', '21', '23', 'E1', '26', '27',
       'A7', '94', 'B7', 'B4', 'A0', 'H0', 'E5', 'F9', 'E0', 'F0', 'D7',
       '31', 'E7', 'I0', 'D9', 'G0', 'E9', 'E4', 'D5', 'E3', 'E8'],
      dtype=object)

##### Bin e-deck RI probabilities by 10s 
Round the e-deck RI probabilities to the nearest 10. (RI probability of 0-5 will go into 5% bin). We identify the desired intensity change, and determine the probability of RI ($ProbItem$) at that intensity change ($Intensity Change$).  Also make sure we are at the 24 hour threshold ($RIstartTAU = 0$, $RIstopTAU = 24$). 

Then, we will find corresponding best tracks for each probability bin.  We will count up the number of days that actually saw RI. 

In [None]:
def bin_edecks(edeck,tech_sel,RI_thresh=30,t0=0,dt=24):
    e_decks_trim = edeck.where((edeck['RIstartTAU']==t0) & (edeck['RIstopTAU']==dt)).dropna(how='all')
    e_decks_trim = e_decks_trim[e_decks_trim['Tech'].isin(tech_sel)]
    e_decks_trim = e_decks_trim[e_decks_trim['Intensity Change']==RI_thresh]
    #
    e_decks_trim['30/24'] = e_decks_trim['30/24'].astype(float)
    e_decks_trim['Prob. RI'] = e_decks_trim['30/24'].round(-1)
    e_decks_trim['Prob. RI'] = e_decks_trim['Prob. RI'].mask(e_decks_trim['30/24'] <= 5, 5)
    # Remove invests (cyclone no > 50)
    e_decks_trim = e_decks_trim[e_decks_trim['CYCLONE NO']<=50]
    #
    e_decks_trim['TIME'] = pd.to_datetime(e_decks_trim['DATE']).dt.hour
    e_decks_trim['DATE'] = pd.to_datetime(e_decks_trim['DATE']).dt.date
    e_decks_trim['DATE_full'] = pd.to_datetime(e_decks_trim['DATE']) + pd.to_timedelta(e_decks_trim['TIME'],'hours')
    return e_decks_trim

In [None]:
edeck_ALL = pd.concat([e_deck,edeck_fria,edeck_ripa],axis=0)
tech_sel = ['RIOC','RIOD','LDA0','FRIA']
edeck_sel = bin_edecks(edeck_ALL,tech_sel)

In [None]:
edeck_ALL.groupby(['BASIN','YEAR']).count()
#edeck_fria.groupby(['BASIN','YEAR']).count()

#### Trim to dates we actually predicted

In [None]:
edeck_trim = edeck_sel[edeck_sel['DATE_full'].isin(y_pred_res['DATE_full'])]

In [None]:
edeck_trim['BASIN'] = edeck_trim['BASIN'].replace({'AL':'ATLANTIC','EP':'EAST_PACIFIC'})

##### Reliability for e-decks

In [None]:
#def calc_edeck_reliability(edeck,tech_sel,bdeck_diffs):
edeck = edeck_trim
pct_range = edeck['Prob. RI'].unique().tolist()
reliability_edecks = pd.DataFrame()
# i_model = tech_sel[1]
# print(i_model)
for i_model in tech_sel:
    print(i_model)
    i_e_decks_trim = edeck.set_index(['Tech']).xs(i_model).reset_index()
    # ipct = 20
    for ipct in pct_range:
        if not ipct in i_e_decks_trim.set_index(['Prob. RI']).index:
            continue
        else:
            xedeck_sel = i_e_decks_trim.set_index(['Prob. RI','ATCFID','DATE','TIME']).xs(ipct)
            bdeck_sel = bdeck_diffs.reset_index().set_index(['ATCFID','DATE','TIME'])
            bdeck_sel['Y true'] = 0
            bdeck_sel['Y true'] = bdeck_sel['Y true'].mask(bdeck_sel['d24_VMAX']>= RI_thresh,1)
            b_ind = bdeck_sel.index.to_list()
            e_ind = xedeck_sel.index.to_list()
            ind_both = set(b_ind)&set(e_ind)
            #
            bdeck_trim = bdeck_sel.loc[ind_both]
            xedeck_trim = xedeck_sel.loc[ind_both]
            total_days = xedeck_trim.reset_index().groupby(['BASIN']).count()
            no_RI = bdeck_trim.groupby(['BASIN'])['Y true'].sum()
            #
            i_df = pd.DataFrame(index=total_days.index,columns={'Observed Pct','Predicted Pct'})
            # 
            if len(no_RI) == 0:
                pct = 0
                i_df['Observed Pct'] = 0
                i_df['Observed No. RI'] = 0
            else:
                pct = 100*(no_RI/total_days['V'])
                i_df['Observed Pct'] = pct
                i_df['Observed No. RI'] = no_RI
            #
            i_df['Predicted Pct'] = ipct
            i_df['Observed No Total'] = total_days['V']
            i_df['Model'] = i_model
            reliability_edecks = reliability_edecks.append(i_df)
#return reliability_edecks

reliability_edecks# = calc_edeck_reliability(edeck_trim,tech_sel,bdeck_diffs)

In [None]:
i_e_decks_trim = edeck.set_index(['Tech']).xs(i_model).reset_index()

In [None]:
reliability_edecks['Model'] = reliability_edecks['Model'].replace('RIOC','OP-CON')
reliability_edecks['Model'] = reliability_edecks['Model'].replace('RIOD','SHIPS-RII')
reliability_edecks['Model'] = reliability_edecks['Model'].replace('LDA0','RIPA')


In [None]:
edeck_sel = i_e_decks_trim.set_index(['Prob. RI','BASIN','ATCFID','DATE','TIME'])

In [None]:
y_pred_res['Y pred probab (class: 1)'] = y_pred_res['Y pred probab (class: 1)']*100

In [None]:
y_pred_res['RI prob'] = (y_pred_res['Y pred probab (class: 1)']).round(-1)

In [None]:
y_pred_res['RI prob'] = y_pred_res['RI prob'].mask(y_pred_res['RI prob'] <= 5, 5)

In [None]:
pct_range = y_pred_res['RI prob'].unique().tolist()
reliability = pd.DataFrame()
for ipct in pct_range:
    # ipct = 10
    if ipct == 100:
        continue
    y_sel = y_pred_res.set_index(['RI prob']).xs(ipct)
    total = y_sel.groupby(['Predicted Basin','Model']).count()
    RI = y_sel[y_sel['Y true']==1].groupby(['Predicted Basin','Model']).count()
    i_df = pd.DataFrame(index=total.index,columns={'Observed Pct','Predicted Pct'})
    if len(RI) == 0:
        pct = 0
        i_df['Observed Pct'] = 0
        i_df['Observed No. RI'] = 0
    else:
        pct = 100*(RI/total)
        i_df['Observed Pct'] = pct['CASE']
        i_df['Observed No. RI'] = RI['CASE']
    i_df['Predicted Pct'] = ipct
    i_df['Observed No Total'] = total['CASE']
    reliability = reliability.append(i_df)

In [None]:
reliability = reliability.mask(reliability.isna(),0)


In [None]:
reliability = reliability.reset_index()
reliability['Model'] = reliability['Model'].replace({'lbfgs':'Logistic Reg.'})
reliability['Predicted Basin'] = reliability['Predicted Basin'].replace({'SOUTH_PACIFIC':'SOUTHERN_HEM'})
reliability = reliability.set_index(['Predicted Basin','Model'])


In [None]:
reliability#.xs(('ALL','Random Forest')).sort_values(['Predicted Pct'])
basin_ALL = reliability.reset_index()['Predicted Basin'].unique().tolist()


In [None]:
reliability_edecks = reliability_edecks.reset_index().rename(columns={'BASIN':'Predicted Basin'}).set_index(['Predicted Basin','Model'])
reliability_ALL = pd.concat([reliability,reliability_edecks])
reliability_ALL = reliability_ALL[reliability_ALL['Predicted Pct'] < 1000]

In [None]:
RF_consensus = reliability_ALL.xs(('Random Forest'),level=1).reset_index().sort_values('Predicted Pct')
# RF_consensus = RF_consensus[RF_consensus['Predicted Basin'].isin(['ATLANTIC','EAST_PACIFIC'])]
RF_consensus = RF_consensus.set_index(['Predicted Basin','Predicted Pct'])
RF_consensus.loc[('EAST_PACIFIC',5),:] = 0
RII_consensus = reliability_ALL.xs(('SHIPS-RII'),level=1).reset_index().set_index(['Predicted Basin','Predicted Pct'])
#RII_consensus.loc[('ATLANTIC',50),:] = 0
#RII_consensus.loc[('ATLANTIC',80),:] = 0
#RII_consensus.loc[('EAST_PACIFIC',70),:] = 0
RF_RII_consensus = pd.concat((RF_consensus.replace(0,np.nan),RII_consensus.replace(0,np.nan))).mean(level=(0,1))
RF_RII_consensus = RF_RII_consensus.reset_index().set_index(['Predicted Basin'])


In [None]:
basin_sel = 'ATLANTIC'
palette = sns.color_palette({'xkcd:red orange','xkcd:cornflower blue','xkcd:leaf green','xkcd:sky blue',
                             'xkcd:slate grey','xkcd:dark orange'})

pal_reduced = sns.color_palette({'xkcd:leaf green','xkcd:goldenrod','xkcd:slate grey','xkcd:dark orange'})
if (basin_sel == 'EAST_PACIFIC') | (basin_sel == 'ATLANTIC'):
    pal_use = palette
else:
    pal_use = pal_reduced
fig1,ax1 = plt.subplots(1,1,figsize=(10,8))
#basin_sel = 'ALL'
ax1.plot([0,111.5],[0,111.5],linewidth=3,color='xkcd:black')
sns.scatterplot(data=reliability_ALL.xs(basin_sel).reset_index().sort_values('Model'),x='Predicted Pct',y='Observed Pct',hue='Model',
                palette=pal_use,ax=ax1,s=150,alpha=0.9)
sns.lineplot(data=reliability_ALL.xs(basin_sel).reset_index().sort_values('Model'),x='Predicted Pct',y='Observed Pct',
            hue='Model',palette=pal_use,ax=ax1,linewidth=3,legend=False)
#
if (basin_sel == 'ATLANTIC') | (basin_sel == 'EAST_PACIFIC'):
    sns.scatterplot(data=RF_RII_consensus.xs(basin_sel),x='Predicted Pct',y='Observed Pct',color='xkcd:magenta',ax=ax1,
               s=170,alpha=0.9,label='ML-CON')
    sns.lineplot(data=RF_RII_consensus.xs(basin_sel).reset_index(),x='Predicted Pct',y='Observed Pct',
             color='xkcd:magenta',ax=ax1,linewidth=3,legend=False)
#sns.scatterplot(data=foo,x='Predicted Pct',y='Observed Pct',hue='Model',
 #              palette=sns.set_palette(palette),ax=ax1,s=150,alpha=0.8)    
ax1.set_ylim([-0.5,111.5])
ax1.set_xlim([-0.5,111.5])
ax1.set_xticks(pct_range)
ax1.set_yticks(np.arange(0,101,10))
ax1.tick_params(axis='y',labelsize=14)
ax1.tick_params(axis='x',labelsize=14)
ax1.legend(fontsize=13,loc='lower right')
ax1.set_xlabel('Predicted RI Probability',fontsize=17)
ax1.set_ylabel('Observed RI Probability',fontsize=17)
plt_nums = reliability_ALL.xs(basin_sel).sort_values(['Predicted Pct']).reset_index().set_index(['Predicted Pct'])
ax1.grid()
# 
totals = reliability_ALL.xs(basin_sel).reset_index().sort_values('Predicted Pct').set_index(['Model','Predicted Pct'])
models_list = reliability_ALL.reset_index()['Model'].unique().tolist()
for i in ['Random Forest','Logistic Reg.']:#,'saga']:
    mod_sel = totals.xs(i)
    for i_pct in pct_range:
        if i_pct in mod_sel.index:
            imod = mod_sel.xs(i_pct)['Observed No Total'].astype(int)
        else:
            imod = 0
        if i == 'Random Forest':
            yval = (101+5)
            if (basin_sel == 'ATLANTIC') | (basin_sel == 'EAST_PACIFIC'):
                i_color = sns.color_palette()[-2]
            else:
                i_color = sns.color_palette()[-0]
        elif i == 'Logistic Reg.':
            yval = (97+5)
            if (basin_sel == 'ATLANTIC') | (basin_sel == 'EAST_PACIFIC'):
                i_color = sns.color_palette()[2]
            else:
                i_color = sns.color_palette()[1]
        elif i == 'saga':
            yval = (102+5 if i_pct < 90 else 91 -6)
            i_color = sns.color_palette()[2]
        #elif i == 'RIOC':
         #   yval = (97+5 if i_pct < 90 else 91 -6)
          #  i_color = sns.color_palette()[3]
        #elif i == 'RIOD':
         #   yval = (89+5 if i_pct < 90 else 91 -6)
          #  i_color = sns.color_palette()[4]
        #
        ax1.text((i_pct-4 if i_pct == 5 else i_pct -2),yval,imod,color=i_color,fontsize=15,weight='semibold')
ax1.set_title('Observed vs Predicted RI Probability, 2019-2021, {basin_sel}'.format(basin_sel=basin_sel),fontsize=24)
fig1.tight_layout()
fig1.savefig('Figures/reliability_ML_models_ALL_2019_2021_{basin_sel}.{fig_format}'.format(basin_sel=basin_sel,
                                                                                          fig_format=fig_format),
                format=fig_format,bbox_inches='tight',dpi=350)

In [None]:
basin_use = ['ATLANTIC','EAST_PACIFIC','WEST_PACIFIC','SOUTHERN_HEM']
palette = sns.color_palette({'xkcd:red orange','xkcd:cornflower blue','xkcd:leaf green','xkcd:goldenrod'})
palette[1:-1]
reliability_ALL = reliability_ALL.drop(index=('Logistic Reg.'),level=1)

In [None]:
reliability_ALL

In [None]:
## ALL 4 in one plot
palette = sns.color_palette({'xkcd:red orange','xkcd:cornflower blue','xkcd:leaf green','xkcd:goldenrod',
                             'xkcd:slate grey'})
lett = ['a)','b)','c)','d)']
fig1,ax1 = plt.subplots(2,2,figsize=(28,24))
for i in np.arange(0,4):
    basin_sel = basin_use[i]
    if (basin_sel == 'WEST_PACIFIC') | (basin_sel=='SOUTHERN_HEM'):
        use_pal = [palette[0],palette[2],palette[3]]
        data_use = reliability_ALL
    else:
        use_pal = [palette[1],palette[3],palette[4]]
        data_use = reliability_ALL.drop(index=('RIPA'),level=1)
        data_use = data_use.drop(index=('FRIA'),level=1)
    ax_sel = ax1.flatten()[i]
    #
    ax_sel.plot([0,118.5],[0,118.5],linewidth=3,color='xkcd:black')
    sns.scatterplot(data=data_use.xs(basin_sel).reset_index().sort_values('Model'),x='Predicted Pct',y='Observed Pct',hue='Model',
                palette=use_pal,ax=ax_sel,s=150,alpha=0.9)
    sns.lineplot(data=data_use.xs(basin_sel).reset_index().sort_values('Model'),x='Predicted Pct',y='Observed Pct',
            hue='Model',palette=use_pal,ax=ax_sel,linewidth=3,legend=False)
    #
    if (basin_sel == 'ATLANTIC') | (basin_sel == 'EAST_PACIFIC'):
        sns.scatterplot(data=RF_RII_consensus.xs(basin_sel),x='Predicted Pct',y='Observed Pct',color='xkcd:magenta',
                        ax=ax_sel,s=170,alpha=0.9,label='ML-CON')
        sns.lineplot(data=RF_RII_consensus.xs(basin_sel).reset_index(),x='Predicted Pct',y='Observed Pct',
             color='xkcd:magenta',ax=ax_sel,linewidth=3,legend=False)
    #sns.scatterplot(data=foo,x='Predicted Pct',y='Observed Pct',hue='Model',
 #              palette=sns.set_palette(palette),ax=ax1,s=150,alpha=0.8)    
    ax_sel.set_ylim([-0.5,111.5])
    ax_sel.set_xlim([-0.5,111.5])
    ax_sel.set_xticks(pct_range)
    ax_sel.set_yticks(np.arange(0,101,10))
    ax_sel.tick_params(axis='y',labelsize=17)
    ax_sel.tick_params(axis='x',labelsize=17)
    ax_sel.legend(fontsize=15,loc='lower right')
    ax_sel.set_xlabel('Predicted RI Probability',fontsize=20)
    ax_sel.set_ylabel('Observed RI Probability',fontsize=20)
    plt_nums = reliability_ALL.xs(basin_sel).sort_values(['Predicted Pct']).reset_index().set_index(['Predicted Pct'])
    ax_sel.grid()
    ax_sel.set_title('{lett} {basin}'.format(lett=lett[i],basin=basin_sel),fontsize=30)
# 
    totals = reliability_ALL.xs(basin_sel).reset_index().sort_values('Predicted Pct').set_index(['Model','Predicted Pct'])
    models_list = reliability_ALL.reset_index()['Model'].unique().tolist()
    models_count = data_use.xs(basin_sel).reset_index()['Model'].unique()
    for i in models_count:#,'saga']:
        mod_sel = totals.xs(i)
        for i_pct in pct_range:
            if i_pct in mod_sel.index:
                imod = mod_sel.xs(i_pct)['Observed No Total'].astype(int)
            else:
                imod = 0
            if i == 'Random Forest':
                yval = (101+7)
                i_color = palette[3]
            elif i == 'RIPA':
                yval = 101+4
                i_color = palette[2]
            elif i == 'FRIA':
                yval = 101+1
                i_color = palette[0]
            elif i == 'OP-CON':
                yval = 101+4
                i_color = palette[1]
            elif i == 'SHIPS-RII':
                yval = 101+1
                i_color = palette[4]
            #elif i == 'saga':
               # yval = (102+5 if i_pct < 90 else 91 -6)
               # i_color = use_pal[2]
            #elif i == 'RIOC':
             #   yval = (97+5 if i_pct < 90 else 91 -6)
              #  i_color = sns.color_palette()[3]
            #elif i == 'RIOD':
             #   yval = (89+5 if i_pct < 90 else 91 -6)
              #  i_color = sns.color_palette()[4]
            #
            ax_sel.text((i_pct-4 if i_pct == 5 else i_pct -2),yval,imod,color=i_color,fontsize=17,weight='semibold')
    
fig1.suptitle('Observed vs Predicted RI Probability, 2019-2021'.format(basin_sel=basin_sel),fontsize=42,y=1.01)
fig1.tight_layout()
fig1.savefig('Figures/reliability_ML_models_ALL_2019_2021_ALL_BASINS.{fig_format}'.format(basin_sel=basin_sel,
                                                                                          fig_format=fig_format),
                format=fig_format,bbox_inches='tight',dpi=350)    
    

In [None]:
use_pal

In [None]:
reliability_ALL.xs('ATLANTIC').reset_index()['Model'].unique()

In [None]:
edeck_ALL.groupby(['ATCF BASIN','Tech']).count().xs('EP')

In [None]:
reliability_ALL.xs(('EAST_PACIFIC','SHIPS-RII')).sort_values(by='Predicted Pct')

In [None]:
reliability_ALL.xs(('ATLANTIC','SHIPS-RII')).sort_values(by='Predicted Pct')