In [6]:
import numpy as np
import xarray as xr
import pandas as pd
import datetime
import glob

Find occurrences of N/A in the text files for numeric quantities

In [7]:
def find_string_indices(list_in,s_in='N/A'):
    matched_indexes = []
    i = 0
    length = len(list_in)
    while i < length:
        if s_in == list_in[i]:
            matched_indexes.append(i)
        i += 1
    return matched_indexes

#### Get list of all realtime files for desired year
Note: original realtime files were obtained from Mark DeMaria via `rammftp`, and copied locally.

In [8]:
yr_sel = 2020
fdir = 'VALIDATION_data/realtime/{yr_sel}/'.format(yr_sel=yr_sel)
all_files = glob.glob(fdir+'*.txt')
no_files = len(all_files)
fname_test = all_files[0]
print(fname_test)

VALIDATION_data/realtime/2020/20121018CP8520_ships.txt


#### Read in each file one at a time

Relevant information:

* TIME
* LAND (KM)
* Prob of RI for 25 kt RI threshold
* Prob of RI for 30 kt RI threshold
* Prob of RI for 35 kt RI threshold
* LAT (DEG N)
* LONG (DEG W)
* Name
* Case No.
* DATE
* TIME
* Storm Type
* V (KT): intensity
* Technique: Which model is it? SHIPS-RII, DTOPS, Consensus

Read in information from header file (`ATCF ID`, `BASIN`, `Cyclone No`, `Date_full`)

In [9]:
lines

['                                 *                  GFS version                   *\n',
 '                                 * EAST PACIFIC 2020 SHIPS INTENSITY FORECAST     *\n',
 '                                 * IR SAT DATA AVAILABLE,       OHC AVAILABLE     *\n',
 '                                 *  TESTERIC    CP852020  12/10/20  18 UTC        *\n',
 '\n',
 'TIME (HR)          0     6    12    18    24    36    48    60    72    84    96   108   120   132   144   156   168\n',
 'V (KT) NO LAND    20    19    17    15   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A\n',
 'V (KT) LAND       20    19    17    15   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A\n',
 'V (KT) LGEM       20    19    18    17    16   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A\n',
 'Storm Type      TROP  TROP  TROP  TROP  TROP  TROP  TROP   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A   N/A\n',
 '\n',
 'SHEAR (KT

In [10]:
fn_test_full = open(fname_test)
lines = fn_test_full.readlines()
## ATCF ID is line 3, element 2
## Date is line 3, element 3
## Time is line 3, element 4
iline = 3
line_sel = lines[iline].split()
atcf_id = line_sel[2]
date = line_sel[3]
time = line_sel[4]
if atcf_id[0] == 'A':
    basin = 'ATLANTIC'
elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
    basin = 'EAST_PACIFIC'
cyc_no = int(atcf_id[2:4])
date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
print(atcf_id,date,basin,cyc_no,date_full)

CP852020 12/10/20 EAST_PACIFIC 85 2020-12-10 18:00:00


Create output file with pre-defined columns

In [11]:
RI_prob_df_ALL = pd.DataFrame(columns={'ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)'})
column_names = ['ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)']
RI_prob_df_ALL = RI_prob_df_ALL.reindex(columns=column_names)


Read in the forecast variables: `TIME`, `V`, `Storm Type`, `Land`, `Lat`, `Lon`. We'll read in `SHIPS-RII`, `Consensus`, and `DTOPS` separately. Get `ATCF ID`, `BASIN`, `Cyclone number`, and `Date` from the header. We'll grab forecasts up to 72 hours for now, even though we'll probably focus on hours 0-24. 

We read in line by line and primarily rely on the `startswith` method. For numeric quantities, we want to convert the numbers in the text file from strings to floats or ints (depending on the number). For some quantities, we occasionally run into `N/A`, so we use the `find_string_indices` to check for that.  If we have N/As, we grab only the numbers and then pad the array with `-9999` using `np.pad`.  Note that `LAT` and `LON` can also have `xx.x` (meaning they are over land) so we check for that as well. 

This is very clunky but since it's a line-by-line reading, there's not a lot to be done. 

In [12]:
for ino in np.arange(0,no_files):
    # Select file and create dataframe for this file
    ifile = all_files[ino]
    print(ifile)
    i_RI_prof_df = pd.DataFrame(columns=column_names)
    # Read in file line by line
    with open(ifile) as fn:
        for line in fn:
            ### Get time
            if line.startswith("TIME (HR)"):
                time = [int(i) for i in line.split()[2:11]]
                i_RI_prof_df['TIME'] = time
                # print(time)
            ### Get V
            elif line.startswith("V (KT) NO LAND"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:13])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind)>=1):
                    iv = [float(i) for i in line.split()[4:matched_ind[0]]]
                    gap = len(time) - len(iv)
                    v = np.pad(iv,(0,gap),'constant',constant_values=-9999)
                # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                elif not matched_ind:
                    v = [float(i) for i in line.split()[4:13]]
                i_RI_prof_df['V (kt)'] = v
                #v24 = v[3] - v[0]
                #i_RI_prof_df['d_24'] = v24
                # print(v)
            ### Get storm type
            elif line.startswith("Storm Type"):
                stype = line.split()[2:11]
                i_RI_prof_df['Storm Type'] = stype
                #print(stype)
            ### Get distance from land
            elif line.startswith("LAND (KM)"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:11])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) >= 1):
                    idtl = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(idtl)
                    dtl = np.pad(idtl,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    dtl = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['DTL (km)'] = dtl
                #print(dtl)
            elif line.startswith("LAT (DEG N)"):
                # Check for N/As and xx.x
                matched_ind = find_string_indices(line.split()[0:12])
                x_ind = find_string_indices(line.split()[0:12],'xx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:matched_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                # If we have xx.x, get only the numbers and pad in the index 
                elif (len(x_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:x_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lat = [float(i) for i in line.split()[3:12]]
                i_RI_prof_df['Lat (N)'] = lat
                #print(lat)
            elif line.startswith("LONG(DEG W)"):
                # Check for N/As and xxx.x
                matched_ind = find_string_indices(line.split()[0:11])
                x_ind = find_string_indices(line.split()[0:12],'xxx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                # If we have xxx.x, get only the numbers and pad in the index     
                elif (len(x_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:x_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lon = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['Lon (W)'] = lon
                #print(lon)
            # Now, get the Pr(RI) calculated by SHIPS-RII
            elif line.startswith("   SHIPS-RII:"):
                i_RI_prof_df['Technique'] = 'SHIPS-RII'
                i_RI_prof_df['Pr RI (20/12)'] = line.split()[1]
                i_RI_prof_df['Pr RI (25/24)'] = line.split()[2]
                i_RI_prof_df['Pr RI (30/24)'] = line.split()[3]
                i_RI_prof_df['Pr RI (35/24)'] = line.split()[4]
                i_RI_prof_df['Pr RI (40/24)'] = line.split()[5]
                i_RI_prof_df['Pr RI (45/36)'] = line.split()[6]
                i_RI_prof_df['Pr RI (55/48)'] = line.split()[7]
                i_RI_prof_df['Pr RI (65/72)'] = line.split()[8]
            #
    ## Now we get the information from the header (Date, time, ATCF ID, basin)
    fn_test_full = open(ifile)
    lines = fn_test_full.readlines()
    ## ATCF ID is line 3, element 2
    ## Date is line 3, element 3
    ## Time is line 3, element 4
    ## Make sure the file actually exists
    if (len(lines) < 3):
        continue
    else:
        iline = 3
        line_sel = lines[iline].split()
        atcf_id = line_sel[2]
        date = line_sel[3]
        time = line_sel[4]
        if atcf_id[0] == 'A':
            basin = 'ATLANTIC'
        elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
            basin = 'EAST_PACIFIC'
        cyc_no = int(atcf_id[2:4])
        date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
        print(atcf_id,date,basin,cyc_no,date_full)
        i_RI_prof_df['ATCF ID'] = atcf_id
        i_RI_prof_df['BASIN'] = basin
        i_RI_prof_df['Cyclone No'] = cyc_no
        i_RI_prof_df['Date_full'] = date_full
        # Add this file to full dataframe
        RI_prob_df_ALL = RI_prob_df_ALL.append(i_RI_prof_df)

VALIDATION_data/realtime/2020/20121018CP8520_ships.txt
CP852020 12/10/20 EAST_PACIFIC 85 2020-12-10 18:00:00
VALIDATION_data/realtime/2020/20102006AL2720_ships.txt
AL272020 10/20/20 ATLANTIC 27 2020-10-20 06:00:00
VALIDATION_data/realtime/2020/20111612AL3120_ships.txt
AL312020 11/16/20 ATLANTIC 31 2020-11-16 12:00:00
VALIDATION_data/realtime/2020/20110406EP2020_ships.txt
EP202020 11/04/20 EAST_PACIFIC 20 2020-11-04 06:00:00
VALIDATION_data/realtime/2020/20111500AL3020_ships.txt
AL302020 11/15/20 ATLANTIC 30 2020-11-15 00:00:00
VALIDATION_data/realtime/2020/20111712EP9820_ships.txt
EP982020 11/17/20 EAST_PACIFIC 98 2020-11-17 12:00:00
VALIDATION_data/realtime/2020/20100412AL9220_ships.txt
AL922020 10/04/20 ATLANTIC 92 2020-10-04 12:00:00
VALIDATION_data/realtime/2020/20102206AL2720_ships.txt
AL272020 10/22/20 ATLANTIC 27 2020-10-22 06:00:00
VALIDATION_data/realtime/2020/20110800AL2920_ships.txt
AL292020 11/08/20 ATLANTIC 29 2020-11-08 00:00:00
VALIDATION_data/realtime/2020/20103012AL962

AL902020 12/01/20 ATLANTIC 90 2020-12-01 12:00:00
VALIDATION_data/realtime/2020/20092118AL2020_ships.txt
AL202020 09/21/20 ATLANTIC 20 2020-09-21 18:00:00
VALIDATION_data/realtime/2020/20100412EP1820_ships.txt
EP182020 10/04/20 EAST_PACIFIC 18 2020-10-04 12:00:00
VALIDATION_data/realtime/2020/20121712EP8020_ships.txt
EP802020 12/17/20 EAST_PACIFIC 80 2020-12-17 12:00:00
VALIDATION_data/realtime/2020/20092100AL1720_ships.txt
AL172020 09/21/20 ATLANTIC 17 2020-09-21 00:00:00
VALIDATION_data/realtime/2020/20092812EP9420_ships.txt
EP942020 09/28/20 EAST_PACIFIC 94 2020-09-28 12:00:00
VALIDATION_data/realtime/2020/20102818AL2820_ships.txt
AL282020 10/28/20 ATLANTIC 28 2020-10-28 18:00:00
VALIDATION_data/realtime/2020/20101212AL9320_ships.txt
AL932020 10/12/20 ATLANTIC 93 2020-10-12 12:00:00
VALIDATION_data/realtime/2020/20092206EP1720_ships.txt
EP172020 09/22/20 EAST_PACIFIC 17 2020-09-22 06:00:00
VALIDATION_data/realtime/2020/20111506AL3120_ships.txt
AL312020 11/15/20 ATLANTIC 31 2020-11-1

AL932020 10/13/20 ATLANTIC 93 2020-10-13 18:00:00
VALIDATION_data/realtime/2020/20112812AL9920_ships.txt
AL992020 11/28/20 ATLANTIC 99 2020-11-28 12:00:00
VALIDATION_data/realtime/2020/20092312EP1720_ships.txt
EP172020 09/23/20 EAST_PACIFIC 17 2020-09-23 12:00:00
VALIDATION_data/realtime/2020/20101100EP9620_ships.txt
EP962020 10/11/20 EAST_PACIFIC 96 2020-10-11 00:00:00
VALIDATION_data/realtime/2020/20102606AL2820_ships.txt
AL282020 10/26/20 ATLANTIC 28 2020-10-26 06:00:00
VALIDATION_data/realtime/2020/20110418EP2020_ships.txt
EP202020 11/04/20 EAST_PACIFIC 20 2020-11-04 18:00:00
VALIDATION_data/realtime/2020/20101406AL9320_ships.txt
AL932020 10/14/20 ATLANTIC 93 2020-10-14 06:00:00
VALIDATION_data/realtime/2020/20111100AL3020_ships.txt
AL302020 11/11/20 ATLANTIC 30 2020-11-11 00:00:00
VALIDATION_data/realtime/2020/20100918EP9620_ships.txt
EP962020 10/09/20 EAST_PACIFIC 96 2020-10-09 18:00:00
VALIDATION_data/realtime/2020/20100818AL2620_ships.txt
AL262020 10/08/20 ATLANTIC 26 2020-10-0

AL202020 09/20/20 ATLANTIC 20 2020-09-20 18:00:00
VALIDATION_data/realtime/2020/20092500AL2220_ships.txt
AL222020 09/25/20 ATLANTIC 22 2020-09-25 00:00:00
VALIDATION_data/realtime/2020/20110218EP9720_ships.txt
EP972020 11/02/20 EAST_PACIFIC 97 2020-11-02 18:00:00
VALIDATION_data/realtime/2020/20101112EP9620_ships.txt
EP962020 10/11/20 EAST_PACIFIC 96 2020-10-11 12:00:00
VALIDATION_data/realtime/2020/20092206AL2220_ships.txt
AL222020 09/22/20 ATLANTIC 22 2020-09-22 06:00:00
VALIDATION_data/realtime/2020/20102518AL2820_ships.txt
AL282020 10/25/20 ATLANTIC 28 2020-10-25 18:00:00
VALIDATION_data/realtime/2020/20092118AL1720_ships.txt
AL172020 09/21/20 ATLANTIC 17 2020-09-21 18:00:00
VALIDATION_data/realtime/2020/20092006AL2220_ships.txt
AL222020 09/20/20 ATLANTIC 22 2020-09-20 06:00:00
VALIDATION_data/realtime/2020/20102912AL2820_ships.txt
AL282020 10/29/20 ATLANTIC 28 2020-10-29 12:00:00
VALIDATION_data/realtime/2020/20111506AL3020_ships.txt
AL302020 11/15/20 ATLANTIC 30 2020-11-15 06:00:

EP982020 11/15/20 EAST_PACIFIC 98 2020-11-15 06:00:00
VALIDATION_data/realtime/2020/20100818EP9620_ships.txt
EP962020 10/08/20 EAST_PACIFIC 96 2020-10-08 18:00:00
VALIDATION_data/realtime/2020/20110900AL2920_ships.txt
AL292020 11/09/20 ATLANTIC 29 2020-11-09 00:00:00
VALIDATION_data/realtime/2020/20110212AL2920_ships.txt
AL292020 11/02/20 ATLANTIC 29 2020-11-02 12:00:00
VALIDATION_data/realtime/2020/20100100EP1820_ships.txt
EP182020 10/01/20 EAST_PACIFIC 18 2020-10-01 00:00:00
VALIDATION_data/realtime/2020/20100312AL2520_ships.txt
AL252020 10/03/20 ATLANTIC 25 2020-10-03 12:00:00
VALIDATION_data/realtime/2020/20100700EP1920_ships.txt
EP192020 10/07/20 EAST_PACIFIC 19 2020-10-07 00:00:00
VALIDATION_data/realtime/2020/20101706AL9420_ships.txt
AL942020 10/17/20 ATLANTIC 94 2020-10-17 06:00:00
VALIDATION_data/realtime/2020/20102000AL2720_ships.txt
AL272020 10/20/20 ATLANTIC 27 2020-10-20 00:00:00
VALIDATION_data/realtime/2020/20101312AL9320_ships.txt
AL932020 10/13/20 ATLANTIC 93 2020-10-1

VALIDATION_data/realtime/2020/20101018EP9620_ships.txt
EP962020 10/10/20 EAST_PACIFIC 96 2020-10-10 18:00:00
VALIDATION_data/realtime/2020/20111600AL3120_ships.txt
AL312020 11/16/20 ATLANTIC 31 2020-11-16 00:00:00
VALIDATION_data/realtime/2020/20102212AL2720_ships.txt
AL272020 10/22/20 ATLANTIC 27 2020-10-22 12:00:00
VALIDATION_data/realtime/2020/20100900EP9620_ships.txt
EP962020 10/09/20 EAST_PACIFIC 96 2020-10-09 00:00:00
VALIDATION_data/realtime/2020/20092106AL2020_ships.txt
AL202020 09/21/20 ATLANTIC 20 2020-09-21 06:00:00
VALIDATION_data/realtime/2020/20111412AL3120_ships.txt
AL312020 11/14/20 ATLANTIC 31 2020-11-14 12:00:00
VALIDATION_data/realtime/2020/20100500AL2520_ships.txt
AL252020 10/05/20 ATLANTIC 25 2020-10-05 00:00:00
VALIDATION_data/realtime/2020/20092112AL2220_ships.txt
AL222020 09/21/20 ATLANTIC 22 2020-09-21 12:00:00
VALIDATION_data/realtime/2020/20092206AL2020_ships.txt
AL202020 09/22/20 ATLANTIC 20 2020-09-22 06:00:00
VALIDATION_data/realtime/2020/20101212EP1920_sh

#### Same but for Consensus instead of SHIPS-RII

In [13]:
RIC_prob_df_ALL = pd.DataFrame(columns={'ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)'})
column_names = ['ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)']
RIC_prob_df_ALL = RIC_prob_df_ALL.reindex(columns=column_names)


In [14]:
for ino in np.arange(0,no_files):
    # Select file and create dataframe for this file
    ifile = all_files[ino]
    print(ifile)
    i_RI_prof_df = pd.DataFrame(columns=column_names)
    # Read in file line by line
    with open(ifile) as fn:
        for line in fn:
            ### Get time
            if line.startswith("TIME (HR)"):
                time = [int(i) for i in line.split()[2:11]]
                i_RI_prof_df['TIME'] = time
                # print(time)
            ### Get V
            elif line.startswith("V (KT) NO LAND"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:13])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind)>=1):
                    iv = [float(i) for i in line.split()[4:matched_ind[0]]]
                    gap = len(time) - len(iv)
                    v = np.pad(iv,(0,gap),'constant',constant_values=-9999)
                # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                elif not matched_ind:
                    v = [float(i) for i in line.split()[4:13]]
                i_RI_prof_df['V (kt)'] = v
                #v24 = v[3] - v[0]
                #i_RI_prof_df['d_24'] = v24
                # print(v)
            ### Get storm type
            elif line.startswith("Storm Type"):
                stype = line.split()[2:11]
                i_RI_prof_df['Storm Type'] = stype
                #print(stype)
            ### Get distance from land
            elif line.startswith("LAND (KM)"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:11])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) >= 1):
                    idtl = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(idtl)
                    dtl = np.pad(idtl,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    dtl = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['DTL (km)'] = dtl
                #print(dtl)
            elif line.startswith("LAT (DEG N)"):
                # Check for N/As and xx.x
                matched_ind = find_string_indices(line.split()[0:12])
                x_ind = find_string_indices(line.split()[0:12],'xx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:matched_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                # If we have xx.x, get only the numbers and pad in the index 
                elif (len(x_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:x_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lat = [float(i) for i in line.split()[3:12]]
                i_RI_prof_df['Lat (N)'] = lat
                #print(lat)
            elif line.startswith("LONG(DEG W)"):
                # Check for N/As and xxx.x
                matched_ind = find_string_indices(line.split()[0:11])
                x_ind = find_string_indices(line.split()[0:12],'xxx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                # If we have xxx.x, get only the numbers and pad in the index     
                elif (len(x_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:x_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lon = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['Lon (W)'] = lon
                #print(lon)
            # Now, get the Pr(RI) calculated by SHIPS-RII
            elif line.startswith("   Consensus:"):
                i_RI_prof_df['Technique'] = 'Consensus'
                i_RI_prof_df['Pr RI (20/12)'] = line.split()[1]
                i_RI_prof_df['Pr RI (25/24)'] = line.split()[2]
                i_RI_prof_df['Pr RI (30/24)'] = line.split()[3]
                i_RI_prof_df['Pr RI (35/24)'] = line.split()[4]
                i_RI_prof_df['Pr RI (40/24)'] = line.split()[5]
                i_RI_prof_df['Pr RI (45/36)'] = line.split()[6]
                i_RI_prof_df['Pr RI (55/48)'] = line.split()[7]
                i_RI_prof_df['Pr RI (65/72)'] = line.split()[8]
            #
    ## Now we get the information from the header (Date, time, ATCF ID, basin)
    fn_test_full = open(ifile)
    lines = fn_test_full.readlines()
    ## ATCF ID is line 3, element 2
    ## Date is line 3, element 3
    ## Time is line 3, element 4
    ## Make sure the file actually exists
    if (len(lines) < 3):
        continue
    else:
        iline = 3
        line_sel = lines[iline].split()
        atcf_id = line_sel[2]
        date = line_sel[3]
        time = line_sel[4]
        if atcf_id[0] == 'A':
            basin = 'ATLANTIC'
        elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
            basin = 'EAST_PACIFIC'
        cyc_no = int(atcf_id[2:4])
        date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
        print(atcf_id,date,basin,cyc_no,date_full)
        i_RI_prof_df['ATCF ID'] = atcf_id
        i_RI_prof_df['BASIN'] = basin
        i_RI_prof_df['Cyclone No'] = cyc_no
        i_RI_prof_df['Date_full'] = date_full
        # Add this file to full dataframe
        RIC_prob_df_ALL = RIC_prob_df_ALL.append(i_RI_prof_df)

VALIDATION_data/realtime/2020/20121018CP8520_ships.txt
CP852020 12/10/20 EAST_PACIFIC 85 2020-12-10 18:00:00
VALIDATION_data/realtime/2020/20102006AL2720_ships.txt
AL272020 10/20/20 ATLANTIC 27 2020-10-20 06:00:00
VALIDATION_data/realtime/2020/20111612AL3120_ships.txt
AL312020 11/16/20 ATLANTIC 31 2020-11-16 12:00:00
VALIDATION_data/realtime/2020/20110406EP2020_ships.txt
EP202020 11/04/20 EAST_PACIFIC 20 2020-11-04 06:00:00
VALIDATION_data/realtime/2020/20111500AL3020_ships.txt
AL302020 11/15/20 ATLANTIC 30 2020-11-15 00:00:00
VALIDATION_data/realtime/2020/20111712EP9820_ships.txt
EP982020 11/17/20 EAST_PACIFIC 98 2020-11-17 12:00:00
VALIDATION_data/realtime/2020/20100412AL9220_ships.txt
AL922020 10/04/20 ATLANTIC 92 2020-10-04 12:00:00
VALIDATION_data/realtime/2020/20102206AL2720_ships.txt
AL272020 10/22/20 ATLANTIC 27 2020-10-22 06:00:00
VALIDATION_data/realtime/2020/20110800AL2920_ships.txt
AL292020 11/08/20 ATLANTIC 29 2020-11-08 00:00:00
VALIDATION_data/realtime/2020/20103012AL962

EP942020 09/28/20 EAST_PACIFIC 94 2020-09-28 12:00:00
VALIDATION_data/realtime/2020/20102818AL2820_ships.txt
AL282020 10/28/20 ATLANTIC 28 2020-10-28 18:00:00
VALIDATION_data/realtime/2020/20101212AL9320_ships.txt
AL932020 10/12/20 ATLANTIC 93 2020-10-12 12:00:00
VALIDATION_data/realtime/2020/20092206EP1720_ships.txt
EP172020 09/22/20 EAST_PACIFIC 17 2020-09-22 06:00:00
VALIDATION_data/realtime/2020/20111506AL3120_ships.txt
AL312020 11/15/20 ATLANTIC 31 2020-11-15 06:00:00
VALIDATION_data/realtime/2020/20112806AL9920_ships.txt
AL992020 11/28/20 ATLANTIC 99 2020-11-28 06:00:00
VALIDATION_data/realtime/2020/20111412EP9820_ships.txt
EP982020 11/14/20 EAST_PACIFIC 98 2020-11-14 12:00:00
VALIDATION_data/realtime/2020/20100600EP1920_ships.txt
EP192020 10/06/20 EAST_PACIFIC 19 2020-10-06 00:00:00
VALIDATION_data/realtime/2020/20102500AL2720_ships.txt
AL272020 10/25/20 ATLANTIC 27 2020-10-25 00:00:00
VALIDATION_data/realtime/2020/20100418EP9520_ships.txt
EP952020 10/04/20 EAST_PACIFIC 95 2020-

AL302020 11/11/20 ATLANTIC 30 2020-11-11 00:00:00
VALIDATION_data/realtime/2020/20100918EP9620_ships.txt
EP962020 10/09/20 EAST_PACIFIC 96 2020-10-09 18:00:00
VALIDATION_data/realtime/2020/20100818AL2620_ships.txt
AL262020 10/08/20 ATLANTIC 26 2020-10-08 18:00:00
VALIDATION_data/realtime/2020/20111000AL9720_ships.txt
AL972020 11/10/20 ATLANTIC 97 2020-11-10 00:00:00
VALIDATION_data/realtime/2020/20100806EP1920_ships.txt
EP192020 10/08/20 EAST_PACIFIC 19 2020-10-08 06:00:00
VALIDATION_data/realtime/2020/20102412AL2720_ships.txt
AL272020 10/24/20 ATLANTIC 27 2020-10-24 12:00:00
VALIDATION_data/realtime/2020/20102800AL2820_ships.txt
AL282020 10/28/20 ATLANTIC 28 2020-10-28 00:00:00
VALIDATION_data/realtime/2020/20092800EP9420_ships.txt
EP942020 09/28/20 EAST_PACIFIC 94 2020-09-28 00:00:00
VALIDATION_data/realtime/2020/20110218AL2920_ships.txt
AL292020 11/02/20 ATLANTIC 29 2020-11-02 18:00:00
VALIDATION_data/realtime/2020/20112906AL9020_ships.txt
AL902020 11/29/20 ATLANTIC 90 2020-11-29 06

AL222020 09/24/20 ATLANTIC 22 2020-09-24 18:00:00
VALIDATION_data/realtime/2020/20100618EP1920_ships.txt
EP192020 10/06/20 EAST_PACIFIC 19 2020-10-06 18:00:00
VALIDATION_data/realtime/2020/20100218EP1820_ships.txt
EP182020 10/02/20 EAST_PACIFIC 18 2020-10-02 18:00:00
VALIDATION_data/realtime/2020/20102306AL2720_ships.txt
AL272020 10/23/20 ATLANTIC 27 2020-10-23 06:00:00
VALIDATION_data/realtime/2020/20101200EP1920_ships.txt
EP192020 10/12/20 EAST_PACIFIC 19 2020-10-12 00:00:00
VALIDATION_data/realtime/2020/20102106AL2720_ships.txt
AL272020 10/21/20 ATLANTIC 27 2020-10-21 06:00:00
VALIDATION_data/realtime/2020/20100706EP1920_ships.txt
EP192020 10/07/20 EAST_PACIFIC 19 2020-10-07 06:00:00
VALIDATION_data/realtime/2020/20092300AL2220_ships.txt
AL222020 09/23/20 ATLANTIC 22 2020-09-23 00:00:00
VALIDATION_data/realtime/2020/20092400AL2020_ships.txt
AL202020 09/24/20 ATLANTIC 20 2020-09-24 00:00:00
VALIDATION_data/realtime/2020/20121712AL8020_ships.txt
AL802020 12/17/20 ATLANTIC 80 2020-12-1

VALIDATION_data/realtime/2020/20111706AL3120_ships.txt
AL312020 11/17/20 ATLANTIC 31 2020-11-17 06:00:00
VALIDATION_data/realtime/2020/20111406EP9820_ships.txt
EP982020 11/14/20 EAST_PACIFIC 98 2020-11-14 06:00:00
VALIDATION_data/realtime/2020/20100200EP1820_ships.txt
EP182020 10/02/20 EAST_PACIFIC 18 2020-10-02 00:00:00
VALIDATION_data/realtime/2020/20111400EP9820_ships.txt
EP982020 11/14/20 EAST_PACIFIC 98 2020-11-14 00:00:00
VALIDATION_data/realtime/2020/20112818AL9920_ships.txt
AL992020 11/28/20 ATLANTIC 99 2020-11-28 18:00:00
VALIDATION_data/realtime/2020/20100118EP1820_ships.txt
EP182020 10/01/20 EAST_PACIFIC 18 2020-10-01 18:00:00
VALIDATION_data/realtime/2020/20092112AL1720_ships.txt
AL172020 09/21/20 ATLANTIC 17 2020-09-21 12:00:00
VALIDATION_data/realtime/2020/20092100AL2020_ships.txt
AL202020 09/21/20 ATLANTIC 20 2020-09-21 00:00:00
VALIDATION_data/realtime/2020/20102500AL2820_ships.txt
AL282020 10/25/20 ATLANTIC 28 2020-10-25 00:00:00
VALIDATION_data/realtime/2020/20111306A

VALIDATION_data/realtime/2020/20110818AL9720_ships.txt
AL972020 11/08/20 ATLANTIC 97 2020-11-08 18:00:00
VALIDATION_data/realtime/2020/20110718AL2920_ships.txt
AL292020 11/07/20 ATLANTIC 29 2020-11-07 18:00:00
VALIDATION_data/realtime/2020/20111400AL3020_ships.txt
AL302020 11/14/20 ATLANTIC 30 2020-11-14 00:00:00
VALIDATION_data/realtime/2020/20111006AL2920_ships.txt
AL292020 11/10/20 ATLANTIC 29 2020-11-10 06:00:00
VALIDATION_data/realtime/2020/20091918AL2220_ships.txt
AL222020 09/19/20 ATLANTIC 22 2020-09-19 18:00:00
VALIDATION_data/realtime/2020/20111512AL3020_ships.txt
AL302020 11/15/20 ATLANTIC 30 2020-11-15 12:00:00
VALIDATION_data/realtime/2020/20111500EP9820_ships.txt
EP982020 11/15/20 EAST_PACIFIC 98 2020-11-15 00:00:00
VALIDATION_data/realtime/2020/20113006AL9020_ships.txt
AL902020 11/30/20 ATLANTIC 90 2020-11-30 06:00:00
VALIDATION_data/realtime/2020/20102518AL2720_ships.txt
AL272020 10/25/20 ATLANTIC 27 2020-10-25 18:00:00
VALIDATION_data/realtime/2020/20111800EP2120_ships.

AL202020 09/22/20 ATLANTIC 20 2020-09-22 18:00:00
VALIDATION_data/realtime/2020/20111300AL9820_ships.txt
AL982020 11/13/20 ATLANTIC 98 2020-11-13 00:00:00
VALIDATION_data/realtime/2020/20101106AL2620_ships.txt
AL262020 10/11/20 ATLANTIC 26 2020-10-11 06:00:00
VALIDATION_data/realtime/2020/20100812EP1920_ships.txt
EP192020 10/08/20 EAST_PACIFIC 19 2020-10-08 12:00:00
VALIDATION_data/realtime/2020/20111012AL2920_ships.txt
AL292020 11/10/20 ATLANTIC 29 2020-11-10 12:00:00
VALIDATION_data/realtime/2020/20101100AL2620_ships.txt
AL262020 10/11/20 ATLANTIC 26 2020-10-11 00:00:00
VALIDATION_data/realtime/2020/20092300EP1720_ships.txt
EP172020 09/23/20 EAST_PACIFIC 17 2020-09-23 00:00:00
VALIDATION_data/realtime/2020/20110406AL2920_ships.txt
AL292020 11/04/20 ATLANTIC 29 2020-11-04 06:00:00
VALIDATION_data/realtime/2020/20101012AL2620_ships.txt
AL262020 10/10/20 ATLANTIC 26 2020-10-10 12:00:00
VALIDATION_data/realtime/2020/20092018EP1720_ships.txt
EP172020 09/20/20 EAST_PACIFIC 17 2020-09-20 18

Combine into one dataframe and remove percentage signs from RI probabilities

In [15]:
RI_prob_full = RI_prob_df_ALL.append(RIC_prob_df_ALL)
RI_prob_full['Pr RI (20/12)'] = RI_prob_full['Pr RI (20/12)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (25/24)'] = RI_prob_full['Pr RI (25/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (30/24)'] = RI_prob_full['Pr RI (30/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (35/24)'] = RI_prob_full['Pr RI (35/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (40/24)'] = RI_prob_full['Pr RI (40/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (45/36)'] = RI_prob_full['Pr RI (45/36)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (55/48)'] = RI_prob_full['Pr RI (55/48)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (65/72)'] = RI_prob_full['Pr RI (65/72)'].str.rstrip("%").astype(float)

In [18]:
foo = RI_prob_full.set_index(['ATCF ID','Date_full']).xs(('AL092020','2020-08-27 18:00:00'))
import seaborn as sns
sns.lineplot(data=foo.reset_index(),x='TIME',y='Pr RI (30/24)',hue='Technique')

  return runner(coro)


KeyError: 'AL092020'

In [19]:
RI_prob_full

Unnamed: 0,ATCF ID,BASIN,Cyclone No,Date_full,TIME,DTL (km),Storm Type,Lat (N),Lon (W),V (kt),Technique,Pr RI (20/12),Pr RI (25/24),Pr RI (30/24),Pr RI (35/24),Pr RI (40/24),Pr RI (45/36),Pr RI (55/48),Pr RI (65/72)
0,CP852020,EAST_PACIFIC,85,2020-12-10 18:00:00,0,2396.0,TROP,10.0,179.0,20.0,SHIPS-RII,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,CP852020,EAST_PACIFIC,85,2020-12-10 18:00:00,6,2501.0,TROP,9.9,180.1,19.0,SHIPS-RII,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,CP852020,EAST_PACIFIC,85,2020-12-10 18:00:00,12,2654.0,TROP,9.8,181.7,17.0,SHIPS-RII,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,CP852020,EAST_PACIFIC,85,2020-12-10 18:00:00,18,2822.0,TROP,9.8,183.5,15.0,SHIPS-RII,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,CP852020,EAST_PACIFIC,85,2020-12-10 18:00:00,24,2867.0,TROP,9.9,185.4,-9999.0,SHIPS-RII,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4,AL272020,ATLANTIC,27,2020-10-21 12:00:00,24,1467.0,TROP,30.8,60.7,83.0,Consensus,3.0,1.9,1.4,0.2,0.0,0.2,0.1,0.0
5,AL272020,ATLANTIC,27,2020-10-21 12:00:00,36,1314.0,TROP,32.1,61.3,83.0,Consensus,3.0,1.9,1.4,0.2,0.0,0.2,0.1,0.0
6,AL272020,ATLANTIC,27,2020-10-21 12:00:00,48,1180.0,TROP,33.4,61.3,85.0,Consensus,3.0,1.9,1.4,0.2,0.0,0.2,0.1,0.0
7,AL272020,ATLANTIC,27,2020-10-21 12:00:00,60,1050.0,TROP,34.6,61.4,85.0,Consensus,3.0,1.9,1.4,0.2,0.0,0.2,0.1,0.0


Get 24-hour intensity change (necessary for RI)

In [17]:
RI_prob_xx = RI_prob_full.replace(-9999,np.nan)
RI_prob_xx = RI_prob_xx.drop_duplicates()
# Add second copy of dates (for shifting)
RI_prob_xx['Date var'] = RI_prob_xx['Date_full']
# Sort by relevant indices
RI_prob_xx = RI_prob_xx.sort_values(['Technique','BASIN','ATCF ID','Date_full','TIME'],ascending=True)
RI_x0 = RI_prob_xx.set_index(['Technique','BASIN','ATCF ID','Date_full','TIME']).xs(0,level=4)
# Shift by 4 to get 24 hours ahead
RI_x4 = RI_x0.shift(-4)
d24 = RI_x4['V (kt)'] - RI_x0['V (kt)']
date_diff = RI_x4['Date var'] - RI_x0['Date var']
# Drop end of forecast period
diff = d24.where(date_diff == pd.Timedelta(1,'D')).dropna(how='all')

In [12]:
RI_prob_full['d_24'] = diff.reset_index()['V (kt)']

Save realtime data

In [13]:
RI_prob_full.to_csv('VALIDATION_data/realtime/SHIPS_realtime_{yr_sel}.csv'.format(yr_sel=yr_sel))

In [14]:
RI_prob_full.groupby(['BASIN','ATCF ID','Date_full','TIME']).apply(lambda x: x['V (kt)'] - x['V (kt)'].shift(-4))

BASIN         ATCF ID   Date_full            TIME   
ATLANTIC      AL172020  2020-09-20 00:00:00  0     0   NaN
                                                   0   NaN
                                             6     1   NaN
                                                   1   NaN
                                             12    2   NaN
                                                        ..
EAST_PACIFIC  EP982020  2020-11-17 18:00:00  48    6   NaN
                                             60    7   NaN
                                                   7   NaN
                                             72    8   NaN
                                                   8   NaN
Name: V (kt), Length: 9216, dtype: float64

In [15]:
xx = RI_prob_full.replace(-9999,np.nan)
xx = xx.drop_duplicates()
xx['Date var'] = xx['Date_full']
xx = xx.sort_values(['Technique','BASIN','ATCF ID','Date_full','TIME'],ascending=True)

In [16]:
yy = xx.set_index(['Technique','BASIN','ATCF ID','Date_full','TIME']).xs(0,level=4)

In [17]:
yy2 = yy.shift(-4)
yy3 = yy2['V (kt)'] - yy['V (kt)']

In [18]:
date_diff = yy2['Date var'] - yy['Date var']
diff = yy3.where(date_diff == pd.Timedelta(1,'D')).dropna(how='all')