In [1]:
import numpy as np
import xarray as xr
import pandas as pd
import datetime
import glob

Find occurrences of N/A in the text files for numeric quantities

In [2]:
def find_string_indices(list_in,s_in='N/A'):
    matched_indexes = []
    i = 0
    length = len(list_in)
    while i < length:
        if s_in == list_in[i]:
            matched_indexes.append(i)
        i += 1
    return matched_indexes

#### Get list of all realtime files for desired year
Note: original realtime files were obtained from Mark DeMaria via `rammftp`, and copied locally.

In [3]:
yr_sel = 2021
fdir = 'VALIDATION_data/realtime/{yr_sel}/'.format(yr_sel=yr_sel)
all_files = glob.glob(fdir+'*.txt')
no_files = len(all_files)
fname_test = all_files[0]
print(fname_test)

VALIDATION_data/realtime/2021/21091512AL1421_ships.txt


#### Read in each file one at a time

Relevant information:

* TIME
* LAND (KM)
* Prob of RI for 25 kt RI threshold
* Prob of RI for 30 kt RI threshold
* Prob of RI for 35 kt RI threshold
* LAT (DEG N)
* LONG (DEG W)
* Name
* Case No.
* DATE
* TIME
* Storm Type
* V (KT)
* Technique: Which model is it? SHIPS-RII, DTOPS, Consensus

Read in information from header file (`ATCF ID`, `BASIN`, `Cyclone No`, `Date_full`)

In [4]:
fn_test_full = open(fname_test)
lines = fn_test_full.readlines()
## ATCF ID is line 3, element 2
## Date is line 3, element 3
## Time is line 3, element 4
iline = 3
line_sel = lines[iline].split()
atcf_id = line_sel[2]
date = line_sel[3]
time = line_sel[4]
if atcf_id[0] == 'A':
    basin = 'ATLANTIC'
elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
    basin = 'EAST_PACIFIC'
cyc_no = int(atcf_id[2:4])
date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
print(atcf_id,date,basin,cyc_no,date_full)

AL142021 09/15/21 ATLANTIC 14 2021-09-15 12:00:00


Create output file with pre-defined columns

In [5]:
RI_prob_df_ALL = pd.DataFrame(columns={'ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)'})
column_names = ['ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)']
RI_prob_df_ALL = RI_prob_df_ALL.reindex(columns=column_names)


Read in the forecast variables: `TIME`, `V`, `Storm Type`, `Land`, `Lat`, `Lon`. We'll read in `SHIPS-RII`, `Consensus`, and `DTOPS` separately. Get `ATCF ID`, `BASIN`, `Cyclone number`, and `Date` from the header. We'll grab forecasts up to 72 hours for now, even though we'll probably focus on hours 0-24. 

We read in line by line and primarily rely on the `startswith` method. For numeric quantities, we want to convert the numbers in the text file from strings to floats or ints (depending on the number). For some quantities, we occasionally run into `N/A`, so we use the `find_string_indices` to check for that.  If we have N/As, we grab only the numbers and then pad the array with `-9999` using `np.pad`.  Note that `LAT` and `LON` can also have `xx.x` (meaning they are over land) so we check for that as well. 

This is very clunky but since it's a line-by-line reading, there's not a lot to be done. 

In [6]:
for ino in np.arange(0,no_files):
    # Select file and create dataframe for this file
    ifile = all_files[ino]
    print(ifile)
    i_RI_prof_df = pd.DataFrame(columns=column_names)
    # Read in file line by line
    with open(ifile) as fn:
        for line in fn:
            ### Get time
            if line.startswith("TIME (HR)"):
                time = [int(i) for i in line.split()[2:11]]
                i_RI_prof_df['TIME'] = time
                # print(time)
            ### Get V
            elif line.startswith("V (KT) NO LAND"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:13])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind)>=1):
                    iv = [float(i) for i in line.split()[4:matched_ind[0]]]
                    gap = len(time) - len(iv)
                    v = np.pad(iv,(0,gap),'constant',constant_values=-9999)
                # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                elif not matched_ind:
                    v = [float(i) for i in line.split()[4:13]]
                i_RI_prof_df['V (kt)'] = v
                # print(v)
            ### Get storm type
            elif line.startswith("Storm Type"):
                stype = line.split()[2:11]
                i_RI_prof_df['Storm Type'] = stype
                #print(stype)
            ### Get distance from land
            elif line.startswith("LAND (KM)"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:11])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) >= 1):
                    idtl = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(idtl)
                    dtl = np.pad(idtl,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    dtl = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['DTL (km)'] = dtl
                #print(dtl)
            elif line.startswith("LAT (DEG N)"):
                # Check for N/As and xx.x
                matched_ind = find_string_indices(line.split()[0:12])
                x_ind = find_string_indices(line.split()[0:12],'xx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:matched_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                # If we have xx.x, get only the numbers and pad in the index 
                elif (len(x_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:x_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lat = [float(i) for i in line.split()[3:12]]
                i_RI_prof_df['Lat (N)'] = lat
                #print(lat)
            elif line.startswith("LONG(DEG W)"):
                # Check for N/As and xxx.x
                matched_ind = find_string_indices(line.split()[0:11])
                x_ind = find_string_indices(line.split()[0:12],'xxx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                # If we have xxx.x, get only the numbers and pad in the index     
                elif (len(x_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:x_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lon = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['Lon (W)'] = lon
                #print(lon)
            # Now, get the Pr(RI) calculated by SHIPS-RII
            elif line.startswith("   SHIPS-RII:"):
                i_RI_prof_df['Technique'] = 'SHIPS-RII'
                i_RI_prof_df['Pr RI (20/12)'] = line.split()[1]
                i_RI_prof_df['Pr RI (25/24)'] = line.split()[2]
                i_RI_prof_df['Pr RI (30/24)'] = line.split()[3]
                i_RI_prof_df['Pr RI (35/24)'] = line.split()[4]
                i_RI_prof_df['Pr RI (40/24)'] = line.split()[5]
                i_RI_prof_df['Pr RI (45/36)'] = line.split()[6]
                i_RI_prof_df['Pr RI (55/48)'] = line.split()[7]
                i_RI_prof_df['Pr RI (65/72)'] = line.split()[8]
            #
    ## Now we get the information from the header (Date, time, ATCF ID, basin)
    fn_test_full = open(ifile)
    lines = fn_test_full.readlines()
    ## ATCF ID is line 3, element 2
    ## Date is line 3, element 3
    ## Time is line 3, element 4
    ## Make sure the file actually exists
    if (len(lines) < 3):
        continue
    else:
        iline = 3
        line_sel = lines[iline].split()
        atcf_id = line_sel[2]
        date = line_sel[3]
        time = line_sel[4]
        if atcf_id[0] == 'A':
            basin = 'ATLANTIC'
        elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
            basin = 'EAST_PACIFIC'
        cyc_no = int(atcf_id[2:4])
        date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
        print(atcf_id,date,basin,cyc_no,date_full)
        i_RI_prof_df['ATCF ID'] = atcf_id
        i_RI_prof_df['BASIN'] = basin
        i_RI_prof_df['Cyclone No'] = cyc_no
        i_RI_prof_df['Date_full'] = date_full
        # Add this file to full dataframe
        RI_prob_df_ALL = RI_prob_df_ALL.append(i_RI_prof_df)

VALIDATION_data/realtime/2021/21091512AL1421_ships.txt
AL142021 09/15/21 ATLANTIC 14 2021-09-15 12:00:00
VALIDATION_data/realtime/2021/21092106AL1521_ships.txt
AL152021 09/21/21 ATLANTIC 15 2021-09-21 06:00:00
VALIDATION_data/realtime/2021/21090912AL1221_ships.txt
AL122021 09/09/21 ATLANTIC 12 2021-09-09 12:00:00
VALIDATION_data/realtime/2021/21093006AL1821_ships.txt
AL182021 09/30/21 ATLANTIC 18 2021-09-30 06:00:00
VALIDATION_data/realtime/2021/21102418EP1721_ships.txt
EP172021 10/24/21 EAST_PACIFIC 17 2021-10-24 18:00:00
VALIDATION_data/realtime/2021/21103000AL9421_ships.txt
AL942021 10/30/21 ATLANTIC 94 2021-10-30 00:00:00
VALIDATION_data/realtime/2021/21101018AL9321_ships.txt
AL932021 10/10/21 ATLANTIC 93 2021-10-10 18:00:00
VALIDATION_data/realtime/2021/21082912AL1121_ships.txt
AL112021 08/29/21 ATLANTIC 11 2021-08-29 12:00:00
VALIDATION_data/realtime/2021/21091812AL9721_ships.txt
AL972021 09/18/21 ATLANTIC 97 2021-09-18 12:00:00
VALIDATION_data/realtime/2021/21082618AL0921_ships.

VALIDATION_data/realtime/2021/21083012AL0921_ships.txt
AL092021 08/30/21 ATLANTIC 9 2021-08-30 12:00:00
VALIDATION_data/realtime/2021/21090918AL1221_ships.txt
AL122021 09/09/21 ATLANTIC 12 2021-09-09 18:00:00
VALIDATION_data/realtime/2021/21092000AL1721_ships.txt
AL172021 09/20/21 ATLANTIC 17 2021-09-20 00:00:00
VALIDATION_data/realtime/2021/21100400AL2021_ships.txt
AL202021 10/04/21 ATLANTIC 20 2021-10-04 00:00:00
VALIDATION_data/realtime/2021/21090706EP9621_ships.txt
EP962021 09/07/21 EAST_PACIFIC 96 2021-09-07 06:00:00
VALIDATION_data/realtime/2021/21091506AL9621_ships.txt
AL962021 09/15/21 ATLANTIC 96 2021-09-15 06:00:00
VALIDATION_data/realtime/2021/21102506EP1721_ships.txt
EP172021 10/25/21 EAST_PACIFIC 17 2021-10-25 06:00:00
VALIDATION_data/realtime/2021/21082612AL0921_ships.txt
AL092021 08/26/21 ATLANTIC 9 2021-08-26 12:00:00
VALIDATION_data/realtime/2021/21091900AL9521_ships.txt
AL952021 09/19/21 ATLANTIC 95 2021-09-19 00:00:00
VALIDATION_data/realtime/2021/21110606AL2121_ship

AL962021 09/15/21 ATLANTIC 96 2021-09-15 12:00:00
VALIDATION_data/realtime/2021/21100218AL2021_ships.txt
AL202021 10/02/21 ATLANTIC 20 2021-10-02 18:00:00
VALIDATION_data/realtime/2021/21092118AL1521_ships.txt
AL152021 09/21/21 ATLANTIC 15 2021-09-21 18:00:00
VALIDATION_data/realtime/2021/21091218AL1421_ships.txt
AL142021 09/12/21 ATLANTIC 14 2021-09-12 18:00:00
VALIDATION_data/realtime/2021/21091800AL9521_ships.txt
AL952021 09/18/21 ATLANTIC 95 2021-09-18 00:00:00
VALIDATION_data/realtime/2021/21090600EP9621_ships.txt
EP962021 09/06/21 EAST_PACIFIC 96 2021-09-06 00:00:00
VALIDATION_data/realtime/2021/21090700EP9621_ships.txt
EP962021 09/07/21 EAST_PACIFIC 96 2021-09-07 00:00:00
VALIDATION_data/realtime/2021/21103018AL9521_ships.txt
AL952021 10/30/21 ATLANTIC 95 2021-10-30 18:00:00
VALIDATION_data/realtime/2021/21110512EP9421_ships.txt
EP942021 11/05/21 EAST_PACIFIC 94 2021-11-05 12:00:00
VALIDATION_data/realtime/2021/21092718AL9021_ships.txt
AL902021 09/27/21 ATLANTIC 90 2021-09-27 18

EP142021 08/27/21 EAST_PACIFIC 14 2021-08-27 12:00:00
VALIDATION_data/realtime/2021/21091712AL9621_ships.txt
AL962021 09/17/21 ATLANTIC 96 2021-09-17 12:00:00
VALIDATION_data/realtime/2021/21092418AL1921_ships.txt
AL192021 09/24/21 ATLANTIC 19 2021-09-24 18:00:00
VALIDATION_data/realtime/2021/21082706AL0921_ships.txt
AL092021 08/27/21 ATLANTIC 9 2021-08-27 06:00:00
VALIDATION_data/realtime/2021/21091600AL1421_ships.txt
AL142021 09/16/21 ATLANTIC 14 2021-09-16 00:00:00
VALIDATION_data/realtime/2021/21101300EP1621_ships.txt
EP162021 10/13/21 EAST_PACIFIC 16 2021-10-13 00:00:00
VALIDATION_data/realtime/2021/21103018AL9421_ships.txt
AL942021 10/30/21 ATLANTIC 94 2021-10-30 18:00:00
VALIDATION_data/realtime/2021/21091518AL9521_ships.txt
AL952021 09/15/21 ATLANTIC 95 2021-09-15 18:00:00
VALIDATION_data/realtime/2021/21091700AL1421_ships.txt
AL142021 09/17/21 ATLANTIC 14 2021-09-17 00:00:00
VALIDATION_data/realtime/2021/21090312AL1221_ships.txt
AL122021 09/03/21 ATLANTIC 12 2021-09-03 12:00:0

AL192021 09/25/21 ATLANTIC 19 2021-09-25 12:00:00
VALIDATION_data/realtime/2021/21092712AL1821_ships.txt
AL182021 09/27/21 ATLANTIC 18 2021-09-27 12:00:00
VALIDATION_data/realtime/2021/21092206AL1621_ships.txt
AL162021 09/22/21 ATLANTIC 16 2021-09-22 06:00:00
VALIDATION_data/realtime/2021/21083106AL1021_ships.txt
AL102021 08/31/21 ATLANTIC 10 2021-08-31 06:00:00
VALIDATION_data/realtime/2021/21090306AL1221_ships.txt
AL122021 09/03/21 ATLANTIC 12 2021-09-03 06:00:00
VALIDATION_data/realtime/2021/21090218AL9121_ships.txt
AL912021 09/02/21 ATLANTIC 91 2021-09-02 18:00:00
VALIDATION_data/realtime/2021/21090612AL1221_ships.txt
AL122021 09/06/21 ATLANTIC 12 2021-09-06 12:00:00
VALIDATION_data/realtime/2021/21090200AL9121_ships.txt
AL912021 09/02/21 ATLANTIC 91 2021-09-02 00:00:00
VALIDATION_data/realtime/2021/21102206EP9221_ships.txt
EP922021 10/22/21 EAST_PACIFIC 92 2021-10-22 06:00:00
VALIDATION_data/realtime/2021/21090112AL0921_ships.txt
AL092021 09/01/21 ATLANTIC 9 2021-09-01 12:00:00
VA

VALIDATION_data/realtime/2021/21101106AL9321_ships.txt
AL932021 10/11/21 ATLANTIC 93 2021-10-11 06:00:00
VALIDATION_data/realtime/2021/21092406AL9921_ships.txt
AL992021 09/24/21 ATLANTIC 99 2021-09-24 06:00:00
VALIDATION_data/realtime/2021/21101100AL9221_ships.txt
AL922021 10/11/21 ATLANTIC 92 2021-10-11 00:00:00
VALIDATION_data/realtime/2021/21092906AL9021_ships.txt
AL902021 09/29/21 ATLANTIC 90 2021-09-29 06:00:00
VALIDATION_data/realtime/2021/21091318EP9721_ships.txt
EP972021 09/13/21 EAST_PACIFIC 97 2021-09-13 18:00:00
VALIDATION_data/realtime/2021/21091812AL1521_ships.txt
AL152021 09/18/21 ATLANTIC 15 2021-09-18 12:00:00
VALIDATION_data/realtime/2021/21082900AL9721_ships.txt
AL972021 08/29/21 ATLANTIC 97 2021-08-29 00:00:00
VALIDATION_data/realtime/2021/21091212AL1421_ships.txt
AL142021 09/12/21 ATLANTIC 14 2021-09-12 12:00:00
VALIDATION_data/realtime/2021/21090100AL1021_ships.txt
AL102021 09/01/21 ATLANTIC 10 2021-09-01 00:00:00
VALIDATION_data/realtime/2021/21090200AL0921_ships.

AL142021 09/14/21 ATLANTIC 14 2021-09-14 18:00:00
VALIDATION_data/realtime/2021/21092712AL1621_ships.txt
AL162021 09/27/21 ATLANTIC 16 2021-09-27 12:00:00
VALIDATION_data/realtime/2021/21082906AL1021_ships.txt
AL102021 08/29/21 ATLANTIC 10 2021-08-29 06:00:00
VALIDATION_data/realtime/2021/21100106AL1821_ships.txt
AL182021 10/01/21 ATLANTIC 18 2021-10-01 06:00:00
VALIDATION_data/realtime/2021/21091100AL9321_ships.txt
AL932021 09/11/21 ATLANTIC 93 2021-09-11 00:00:00
VALIDATION_data/realtime/2021/21110200AL2121_ships.txt
AL212021 11/02/21 ATLANTIC 21 2021-11-02 00:00:00
VALIDATION_data/realtime/2021/21082706AL9721_ships.txt
AL972021 08/27/21 ATLANTIC 97 2021-08-27 06:00:00
VALIDATION_data/realtime/2021/21102512EP1721_ships.txt
EP172021 10/25/21 EAST_PACIFIC 17 2021-10-25 12:00:00
VALIDATION_data/realtime/2021/21090818AL1321_ships.txt
AL132021 09/08/21 ATLANTIC 13 2021-09-08 18:00:00
VALIDATION_data/realtime/2021/21100918AL9221_ships.txt
AL922021 10/09/21 ATLANTIC 92 2021-10-09 18:00:00
V

AL162021 09/22/21 ATLANTIC 16 2021-09-22 12:00:00
VALIDATION_data/realtime/2021/21091306AL9521_ships.txt
AL952021 09/13/21 ATLANTIC 95 2021-09-13 06:00:00
VALIDATION_data/realtime/2021/21092006AL1621_ships.txt
AL162021 09/20/21 ATLANTIC 16 2021-09-20 06:00:00
VALIDATION_data/realtime/2021/21090300AL9221_ships.txt
AL922021 09/03/21 ATLANTIC 92 2021-09-03 00:00:00
VALIDATION_data/realtime/2021/21091106EP1521_ships.txt
EP152021 09/11/21 EAST_PACIFIC 15 2021-09-11 06:00:00
VALIDATION_data/realtime/2021/21091218AL9321_ships.txt
AL932021 09/12/21 ATLANTIC 93 2021-09-12 18:00:00
VALIDATION_data/realtime/2021/21090706AL1221_ships.txt
AL122021 09/07/21 ATLANTIC 12 2021-09-07 06:00:00
VALIDATION_data/realtime/2021/21090612AL9121_ships.txt
AL912021 09/06/21 ATLANTIC 91 2021-09-06 12:00:00
VALIDATION_data/realtime/2021/21092012AL9821_ships.txt
AL982021 09/20/21 ATLANTIC 98 2021-09-20 12:00:00
VALIDATION_data/realtime/2021/21102306EP1721_ships.txt
EP172021 10/23/21 EAST_PACIFIC 17 2021-10-23 06:00:

#### Same but for Consensus instead of SHIPS-RII

In [7]:
RIC_prob_df_ALL = pd.DataFrame(columns={'ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)'})
column_names = ['ATCF ID','BASIN','Cyclone No','Date_full','TIME','DTL (km)','Storm Type',
                                       'Lat (N)','Lon (W)','V (kt)','Technique',
                                       'Pr RI (20/12)','Pr RI (25/24)','Pr RI (30/24)','Pr RI (35/24)',
                                       'Pr RI (40/24)','Pr RI (45/36)','Pr RI (55/48)','Pr RI (65/72)']
RIC_prob_df_ALL = RIC_prob_df_ALL.reindex(columns=column_names)


In [8]:
for ino in np.arange(0,no_files):
    # Select file and create dataframe for this file
    ifile = all_files[ino]
    print(ifile)
    i_RI_prof_df = pd.DataFrame(columns=column_names)
    # Read in file line by line
    with open(ifile) as fn:
        for line in fn:
            ### Get time
            if line.startswith("TIME (HR)"):
                time = [int(i) for i in line.split()[2:11]]
                i_RI_prof_df['TIME'] = time
                # print(time)
            ### Get V
            elif line.startswith("V (KT) NO LAND"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:13])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind)>=1):
                    iv = [float(i) for i in line.split()[4:matched_ind[0]]]
                    gap = len(time) - len(iv)
                    v = np.pad(iv,(0,gap),'constant',constant_values=-9999)
                # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                elif not matched_ind:
                    v = [float(i) for i in line.split()[4:13]]
                i_RI_prof_df['V (kt)'] = v
                # print(v)
            ### Get storm type
            elif line.startswith("Storm Type"):
                stype = line.split()[2:11]
                i_RI_prof_df['Storm Type'] = stype
                #print(stype)
            ### Get distance from land
            elif line.startswith("LAND (KM)"):
                # Check for N/As
                matched_ind = find_string_indices(line.split()[0:11])
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) >= 1):
                    idtl = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(idtl)
                    dtl = np.pad(idtl,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    dtl = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['DTL (km)'] = dtl
                #print(dtl)
            elif line.startswith("LAT (DEG N)"):
                # Check for N/As and xx.x
                matched_ind = find_string_indices(line.split()[0:12])
                x_ind = find_string_indices(line.split()[0:12],'xx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:matched_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                # If we have xx.x, get only the numbers and pad in the index 
                elif (len(x_ind) > 0):
                    ilat = [float(i) for i in line.split()[3:x_ind[0]]]
                    gap = len(time) - len(ilat)
                    lat = np.pad(ilat,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lat = [float(i) for i in line.split()[3:12]]
                i_RI_prof_df['Lat (N)'] = lat
                #print(lat)
            elif line.startswith("LONG(DEG W)"):
                # Check for N/As and xxx.x
                matched_ind = find_string_indices(line.split()[0:11])
                x_ind = find_string_indices(line.split()[0:12],'xxx.x')
                # If we have N/As, get only the numbers and pad in the index 
                if (len(matched_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:matched_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                # If we have xxx.x, get only the numbers and pad in the index     
                elif (len(x_ind) > 0):
                    ilon = [float(i) for i in line.split()[2:x_ind[0]]]
                    gap = len(time) - len(ilon)
                    lon = np.pad(ilon,(0,gap),'constant',constant_values=-9999)
                 # Otherwise, just get the right numbers that correspond to forecast hours we are interested in. 
                else:
                    lon = [float(i) for i in line.split()[2:11]]
                i_RI_prof_df['Lon (W)'] = lon
                #print(lon)
            # Now, get the Pr(RI) calculated by SHIPS-RII
            elif line.startswith("   Consensus:"):
                i_RI_prof_df['Technique'] = 'Consensus'
                i_RI_prof_df['Pr RI (20/12)'] = line.split()[1]
                i_RI_prof_df['Pr RI (25/24)'] = line.split()[2]
                i_RI_prof_df['Pr RI (30/24)'] = line.split()[3]
                i_RI_prof_df['Pr RI (35/24)'] = line.split()[4]
                i_RI_prof_df['Pr RI (40/24)'] = line.split()[5]
                i_RI_prof_df['Pr RI (45/36)'] = line.split()[6]
                i_RI_prof_df['Pr RI (55/48)'] = line.split()[7]
                i_RI_prof_df['Pr RI (65/72)'] = line.split()[8]
            #
    ## Now we get the information from the header (Date, time, ATCF ID, basin)
    fn_test_full = open(ifile)
    lines = fn_test_full.readlines()
    ## ATCF ID is line 3, element 2
    ## Date is line 3, element 3
    ## Time is line 3, element 4
    ## Make sure the file actually exists
    if (len(lines) < 3):
        continue
    else:
        iline = 3
        line_sel = lines[iline].split()
        atcf_id = line_sel[2]
        date = line_sel[3]
        time = line_sel[4]
        if atcf_id[0] == 'A':
            basin = 'ATLANTIC'
        elif ((atcf_id[0] == 'C' )|(atcf_id[0] == 'E')):
            basin = 'EAST_PACIFIC'
        cyc_no = int(atcf_id[2:4])
        date_full = pd.to_datetime(date)+pd.Timedelta(int(time),'H')
        print(atcf_id,date,basin,cyc_no,date_full)
        i_RI_prof_df['ATCF ID'] = atcf_id
        i_RI_prof_df['BASIN'] = basin
        i_RI_prof_df['Cyclone No'] = cyc_no
        i_RI_prof_df['Date_full'] = date_full
        # Add this file to full dataframe
        RIC_prob_df_ALL = RIC_prob_df_ALL.append(i_RI_prof_df)

VALIDATION_data/realtime/2021/21091512AL1421_ships.txt
AL142021 09/15/21 ATLANTIC 14 2021-09-15 12:00:00
VALIDATION_data/realtime/2021/21092106AL1521_ships.txt
AL152021 09/21/21 ATLANTIC 15 2021-09-21 06:00:00
VALIDATION_data/realtime/2021/21090912AL1221_ships.txt
AL122021 09/09/21 ATLANTIC 12 2021-09-09 12:00:00
VALIDATION_data/realtime/2021/21093006AL1821_ships.txt
AL182021 09/30/21 ATLANTIC 18 2021-09-30 06:00:00
VALIDATION_data/realtime/2021/21102418EP1721_ships.txt
EP172021 10/24/21 EAST_PACIFIC 17 2021-10-24 18:00:00
VALIDATION_data/realtime/2021/21103000AL9421_ships.txt
AL942021 10/30/21 ATLANTIC 94 2021-10-30 00:00:00
VALIDATION_data/realtime/2021/21101018AL9321_ships.txt
AL932021 10/10/21 ATLANTIC 93 2021-10-10 18:00:00
VALIDATION_data/realtime/2021/21082912AL1121_ships.txt
AL112021 08/29/21 ATLANTIC 11 2021-08-29 12:00:00
VALIDATION_data/realtime/2021/21091812AL9721_ships.txt
AL972021 09/18/21 ATLANTIC 97 2021-09-18 12:00:00
VALIDATION_data/realtime/2021/21082618AL0921_ships.

VALIDATION_data/realtime/2021/21091506AL9621_ships.txt
AL962021 09/15/21 ATLANTIC 96 2021-09-15 06:00:00
VALIDATION_data/realtime/2021/21102506EP1721_ships.txt
EP172021 10/25/21 EAST_PACIFIC 17 2021-10-25 06:00:00
VALIDATION_data/realtime/2021/21082612AL0921_ships.txt
AL092021 08/26/21 ATLANTIC 9 2021-08-26 12:00:00
VALIDATION_data/realtime/2021/21091900AL9521_ships.txt
AL952021 09/19/21 ATLANTIC 95 2021-09-19 00:00:00
VALIDATION_data/realtime/2021/21110606AL2121_ships.txt
AL212021 11/06/21 ATLANTIC 21 2021-11-06 06:00:00
VALIDATION_data/realtime/2021/21092006AL7221_ships.txt
AL722021 09/20/21 ATLANTIC 72 2021-09-20 06:00:00
VALIDATION_data/realtime/2021/21092418AL9921_ships.txt
AL992021 09/24/21 ATLANTIC 99 2021-09-24 18:00:00
VALIDATION_data/realtime/2021/21091118AL9321_ships.txt
AL932021 09/11/21 ATLANTIC 93 2021-09-11 18:00:00
VALIDATION_data/realtime/2021/21090818AL1221_ships.txt
AL122021 09/08/21 ATLANTIC 12 2021-09-08 18:00:00
VALIDATION_data/realtime/2021/21091906AL1621_ships.t

EP942021 11/05/21 EAST_PACIFIC 94 2021-11-05 12:00:00
VALIDATION_data/realtime/2021/21092718AL9021_ships.txt
AL902021 09/27/21 ATLANTIC 90 2021-09-27 18:00:00
VALIDATION_data/realtime/2021/21110812EP1821_ships.txt
EP182021 11/08/21 EAST_PACIFIC 18 2021-11-08 12:00:00
VALIDATION_data/realtime/2021/21092300AL1821_ships.txt
AL182021 09/23/21 ATLANTIC 18 2021-09-23 00:00:00
VALIDATION_data/realtime/2021/21102800AL9421_ships.txt
AL942021 10/28/21 ATLANTIC 94 2021-10-28 00:00:00
VALIDATION_data/realtime/2021/21092118AL9821_ships.txt
AL982021 09/21/21 ATLANTIC 98 2021-09-21 18:00:00
VALIDATION_data/realtime/2021/21092812AL9121_ships.txt
AL912021 09/28/21 ATLANTIC 91 2021-09-28 12:00:00
VALIDATION_data/realtime/2021/21092606AL1821_ships.txt
AL182021 09/26/21 ATLANTIC 18 2021-09-26 06:00:00
VALIDATION_data/realtime/2021/21110700AL2121_ships.txt
AL212021 11/07/21 ATLANTIC 21 2021-11-07 00:00:00
VALIDATION_data/realtime/2021/21092118AL1621_ships.txt
AL162021 09/21/21 ATLANTIC 16 2021-09-21 18:00:

AL142021 09/17/21 ATLANTIC 14 2021-09-17 00:00:00
VALIDATION_data/realtime/2021/21090312AL1221_ships.txt
AL122021 09/03/21 ATLANTIC 12 2021-09-03 12:00:00
VALIDATION_data/realtime/2021/21110306EP9321_ships.txt
EP932021 11/03/21 EAST_PACIFIC 93 2021-11-03 06:00:00
VALIDATION_data/realtime/2021/21082806AL9821_ships.txt
AL982021 08/28/21 ATLANTIC 98 2021-08-28 06:00:00
VALIDATION_data/realtime/2021/21092006AL1721_ships.txt
AL172021 09/20/21 ATLANTIC 17 2021-09-20 06:00:00
VALIDATION_data/realtime/2021/21082918EP1421_ships.txt
EP142021 08/29/21 EAST_PACIFIC 14 2021-08-29 18:00:00
VALIDATION_data/realtime/2021/21082618AL9821_ships.txt
AL982021 08/26/21 ATLANTIC 98 2021-08-26 18:00:00
VALIDATION_data/realtime/2021/21092212AL1521_ships.txt
AL152021 09/22/21 ATLANTIC 15 2021-09-22 12:00:00
VALIDATION_data/realtime/2021/21091718AL9621_ships.txt
AL962021 09/17/21 ATLANTIC 96 2021-09-17 18:00:00
VALIDATION_data/realtime/2021/21082718EP1421_ships.txt
EP142021 08/27/21 EAST_PACIFIC 14 2021-08-27 18

EP922021 10/22/21 EAST_PACIFIC 92 2021-10-22 06:00:00
VALIDATION_data/realtime/2021/21090112AL0921_ships.txt
AL092021 09/01/21 ATLANTIC 9 2021-09-01 12:00:00
VALIDATION_data/realtime/2021/21091200EP9721_ships.txt
EP972021 09/12/21 EAST_PACIFIC 97 2021-09-12 00:00:00
VALIDATION_data/realtime/2021/21101306EP1621_ships.txt
EP162021 10/13/21 EAST_PACIFIC 16 2021-10-13 06:00:00
VALIDATION_data/realtime/2021/21090406AL1221_ships.txt
AL122021 09/04/21 ATLANTIC 12 2021-09-04 06:00:00
VALIDATION_data/realtime/2021/21111006EP1821_ships.txt
EP182021 11/10/21 EAST_PACIFIC 18 2021-11-10 06:00:00
VALIDATION_data/realtime/2021/21091206AL9321_ships.txt
AL932021 09/12/21 ATLANTIC 93 2021-09-12 06:00:00
VALIDATION_data/realtime/2021/21100712AL9221_ships.txt
AL922021 10/07/21 ATLANTIC 92 2021-10-07 12:00:00
VALIDATION_data/realtime/2021/21091118AL9421_ships.txt
AL942021 09/11/21 ATLANTIC 94 2021-09-11 18:00:00
VALIDATION_data/realtime/2021/21110300AL2121_ships.txt
AL212021 11/03/21 ATLANTIC 21 2021-11-03

VALIDATION_data/realtime/2021/21102812AL9421_ships.txt
AL942021 10/28/21 ATLANTIC 94 2021-10-28 12:00:00
VALIDATION_data/realtime/2021/21091706AL9621_ships.txt
AL962021 09/17/21 ATLANTIC 96 2021-09-17 06:00:00
VALIDATION_data/realtime/2021/21101106AL9321_ships.txt
AL932021 10/11/21 ATLANTIC 93 2021-10-11 06:00:00
VALIDATION_data/realtime/2021/21092406AL9921_ships.txt
AL992021 09/24/21 ATLANTIC 99 2021-09-24 06:00:00
VALIDATION_data/realtime/2021/21101100AL9221_ships.txt
AL922021 10/11/21 ATLANTIC 92 2021-10-11 00:00:00
VALIDATION_data/realtime/2021/21092906AL9021_ships.txt
AL902021 09/29/21 ATLANTIC 90 2021-09-29 06:00:00
VALIDATION_data/realtime/2021/21091318EP9721_ships.txt
EP972021 09/13/21 EAST_PACIFIC 97 2021-09-13 18:00:00
VALIDATION_data/realtime/2021/21091812AL1521_ships.txt
AL152021 09/18/21 ATLANTIC 15 2021-09-18 12:00:00
VALIDATION_data/realtime/2021/21082900AL9721_ships.txt
AL972021 08/29/21 ATLANTIC 97 2021-08-29 00:00:00
VALIDATION_data/realtime/2021/21091212AL1421_ships.

VALIDATION_data/realtime/2021/21103106AL2121_ships.txt
AL212021 10/31/21 ATLANTIC 21 2021-10-31 06:00:00
VALIDATION_data/realtime/2021/21092806AL9021_ships.txt
AL902021 09/28/21 ATLANTIC 90 2021-09-28 06:00:00
VALIDATION_data/realtime/2021/21091418AL1421_ships.txt
AL142021 09/14/21 ATLANTIC 14 2021-09-14 18:00:00
VALIDATION_data/realtime/2021/21092712AL1621_ships.txt
AL162021 09/27/21 ATLANTIC 16 2021-09-27 12:00:00
VALIDATION_data/realtime/2021/21082906AL1021_ships.txt
AL102021 08/29/21 ATLANTIC 10 2021-08-29 06:00:00
VALIDATION_data/realtime/2021/21100106AL1821_ships.txt
AL182021 10/01/21 ATLANTIC 18 2021-10-01 06:00:00
VALIDATION_data/realtime/2021/21091100AL9321_ships.txt
AL932021 09/11/21 ATLANTIC 93 2021-09-11 00:00:00
VALIDATION_data/realtime/2021/21110200AL2121_ships.txt
AL212021 11/02/21 ATLANTIC 21 2021-11-02 00:00:00
VALIDATION_data/realtime/2021/21082706AL9721_ships.txt
AL972021 08/27/21 ATLANTIC 97 2021-08-27 06:00:00
VALIDATION_data/realtime/2021/21102512EP1721_ships.txt


VALIDATION_data/realtime/2021/21092800AL9021_ships.txt
AL902021 09/28/21 ATLANTIC 90 2021-09-28 00:00:00
VALIDATION_data/realtime/2021/21101018AL9221_ships.txt
AL922021 10/10/21 ATLANTIC 92 2021-10-10 18:00:00
VALIDATION_data/realtime/2021/21110700EP1821_ships.txt
EP182021 11/07/21 EAST_PACIFIC 18 2021-11-07 00:00:00
VALIDATION_data/realtime/2021/21100306AL2021_ships.txt
AL202021 10/03/21 ATLANTIC 20 2021-10-03 06:00:00
VALIDATION_data/realtime/2021/21091806AL9521_ships.txt
AL952021 09/18/21 ATLANTIC 95 2021-09-18 06:00:00
VALIDATION_data/realtime/2021/21100112EP9021_ships.txt
EP902021 10/01/21 EAST_PACIFIC 90 2021-10-01 12:00:00
VALIDATION_data/realtime/2021/21110606EP1821_ships.txt
EP182021 11/06/21 EAST_PACIFIC 18 2021-11-06 06:00:00
VALIDATION_data/realtime/2021/21083112AL1021_ships.txt
AL102021 08/31/21 ATLANTIC 10 2021-08-31 12:00:00
VALIDATION_data/realtime/2021/21091118EP9721_ships.txt
EP972021 09/11/21 EAST_PACIFIC 97 2021-09-11 18:00:00
VALIDATION_data/realtime/2021/21110900E

AL102021 08/30/21 ATLANTIC 10 2021-08-30 06:00:00
VALIDATION_data/realtime/2021/21090806AL1221_ships.txt
AL122021 09/08/21 ATLANTIC 12 2021-09-08 06:00:00
VALIDATION_data/realtime/2021/21092206AL1721_ships.txt
AL172021 09/22/21 ATLANTIC 17 2021-09-22 06:00:00
VALIDATION_data/realtime/2021/21082906AL0921_ships.txt
AL092021 08/29/21 ATLANTIC 9 2021-08-29 06:00:00
VALIDATION_data/realtime/2021/21090212AL1221_ships.txt
AL122021 09/02/21 ATLANTIC 12 2021-09-02 12:00:00
VALIDATION_data/realtime/2021/21091600AL9621_ships.txt
AL962021 09/16/21 ATLANTIC 96 2021-09-16 00:00:00
VALIDATION_data/realtime/2021/21101318EP1621_ships.txt
EP162021 10/13/21 EAST_PACIFIC 16 2021-10-13 18:00:00
VALIDATION_data/realtime/2021/21092506AL1821_ships.txt
AL182021 09/25/21 ATLANTIC 18 2021-09-25 06:00:00
VALIDATION_data/realtime/2021/21091906AL1721_ships.txt
AL172021 09/19/21 ATLANTIC 17 2021-09-19 06:00:00
VALIDATION_data/realtime/2021/21092306AL1821_ships.txt
AL182021 09/23/21 ATLANTIC 18 2021-09-23 06:00:00
VA

Combine into one dataframe and remove percentage signs from RI probabilities

In [9]:
RI_prob_full = RI_prob_df_ALL.append(RIC_prob_df_ALL)
RI_prob_full['Pr RI (20/12)'] = RI_prob_full['Pr RI (20/12)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (25/24)'] = RI_prob_full['Pr RI (25/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (30/24)'] = RI_prob_full['Pr RI (30/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (35/24)'] = RI_prob_full['Pr RI (35/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (40/24)'] = RI_prob_full['Pr RI (40/24)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (45/36)'] = RI_prob_full['Pr RI (45/36)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (55/48)'] = RI_prob_full['Pr RI (55/48)'].str.rstrip("%").astype(float)
RI_prob_full['Pr RI (65/72)'] = RI_prob_full['Pr RI (65/72)'].str.rstrip("%").astype(float)

In [10]:
#foo = RI_prob_full.set_index(['ATCF ID','Date_full']).xs(('AL092021','2021-08-27 18:00:00'))
#import seaborn as sns
#sns.lineplot(data=foo.reset_index(),x='TIME',y='Pr RI (30/24)',hue='Technique')

Save realtime data

In [11]:
RI_prob_full.to_csv('VALIDATION_data/realtime/SHIPS_realtime_{yr_sel}.csv'.format(yr_sel=yr_sel))