<a href="https://colab.research.google.com/github/m-wessler/nbm-verification/blob/main/NBM_Text_Product_Parser_MWE.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [3]:
import numpy as np
import pandas as pd

from tqdm import tqdm
from datetime import datetime, timedelta

In [4]:
# Fixed forecast length (15 periods, NBE)
forecast_periods = {'NBE':15,}
fixed_columns = {'NBE':59,}

filename = 'blend_nbetx.t01z.txt'

with open(filename, 'r') as rfp:
    file_data = np.array(rfp.readlines())

In [5]:
agg_df = pd.DataFrame()
init_datetime = None

starts = np.where(np.char.find(file_data, "NBE") != -1)[0]
ends = np.append((starts-1)[1:], len(file_data)-1)

for start, end in tqdm(zip(starts, ends),
                       total=len(starts), desc=f'Parsing {filename}'):

    site_block = file_data[start:end]

    # ONLY PROCESS DATETIME ON FIRST ENTRY, consistent throughout file
    if init_datetime == None:
        site_id, prod_id, init_date, init_hour, timezone = [
            site_block[0].split(' ')[i] for i in [1, 5, 10, 12, 13]]

        init_datetime = datetime.strptime(
            f'{init_date} {init_hour}', '%m/%d/%Y %H%M')

        # First column is ALWAYS the next day (NBE)
        fhr0_datetime = (init_datetime + timedelta(days=1)).replace(
            hour=int(site_block[2][8:10]))

    else:
        site_id = site_block[0].split(' ')[1]

    datetime_index = [fhr0_datetime + timedelta(hours=12*i)
        for i in range(0, forecast_periods[prod_id])]

    site_matrix = {'datetime':datetime_index,
                   'site_id':[site_id]*forecast_periods[prod_id]}

    for line in site_block[3:]:
        var_id = line[:4]
        var_raw = line[7:67].rstrip()
        var_data = [var_raw[i:i+3] for i in range(0, fixed_columns[prod_id], 4)]
        site_matrix[var_id] = var_data

    agg_df = pd.concat([agg_df,
        pd.DataFrame(site_matrix).set_index(['datetime', 'site_id'])])

agg_df.sort_index(inplace=True)
agg_df

Parsing blend_nbetx.t01z.txt: 100%|██████████| 9589/9589 [18:24<00:00,  8.68it/s]


Unnamed: 0_level_0,Unnamed: 1_level_0,FHR,TXN,XND,TMP,TSD,DPT,DSD,SKY,SSD,WDR,...,PZR,PSN,PPL,PRA,S12,SLV,I12,S24,SOL,SWH
datetime,site_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2024-05-16,086092,23,90,3,82,4,73,3,69,22,23,...,0,0,0,21,0,120,0,,54,2
2024-05-16,188557,23,65,2,63,2,60,2,78,27,3,...,0,0,0,4,0,100,0,,12,1
2024-05-16,220792,23,85,2,82,2,68,2,11,25,31,...,0,0,0,0,0,95,0,,68,0
2024-05-16,2A1,23,71,1,65,2,61,2,58,35,30,...,0,0,0,29,0,110,0,,30,
2024-05-16,2M2,23,76,1,71,2,63,2,26,22,31,...,0,0,0,1,0,100,0,,44,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-23,ZNPU1,191,87,2,83,4,24,4,24,17,24,...,0,0,0,0,0,94,0,,64,
2024-05-23,ZNRN5,191,77,1,74,4,12,4,12,9,24,...,0,0,0,0,0,100,0,,67,
2024-05-23,ZONM8,191,58,6,56,5,36,3,64,22,26,...,0,0,0,53,0,69,0,,5,
2024-05-23,ZSFO1,191,82,4,76,4,62,4,42,24,25,...,0,0,0,52,0,100,0,,13,


In [10]:
output_csv_file = f'blend_nbetx.{datetime.strftime(init_datetime, "%Y%m%d")}.t13z.csv'
agg_df.to_csv(output_csv_file)

In [12]:
pd.read_csv(output_csv_file).set_index(['datetime', 'site_id'])

  pd.read_csv(output_csv_file).set_index(['datetime', 'site_id'])


Unnamed: 0_level_0,Unnamed: 1_level_0,FHR,TXN,XND,TMP,TSD,DPT,DSD,SKY,SSD,WDR,...,PZR,PSN,PPL,PRA,S12,SLV,I12,S24,SOL,SWH
datetime,site_id,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1
2024-05-16 00:00:00,086092,23,90.0,3.0,82.0,4.0,73.0,3.0,69.0,22.0,23.0,...,0.0,0.0,0.0,21.0,0.0,120.0,0.0,,54.0,2.0
2024-05-16 00:00:00,188557,23,65.0,2.0,63.0,2.0,60.0,2.0,78.0,27.0,3.0,...,0.0,0.0,0.0,4.0,0.0,100.0,0.0,,12.0,1.0
2024-05-16 00:00:00,220792,23,85.0,2.0,82.0,2.0,68.0,2.0,11.0,25.0,31.0,...,0.0,0.0,0.0,0.0,0.0,95.0,0.0,,68.0,0.0
2024-05-16 00:00:00,2A1,23,71.0,1.0,65.0,2.0,61.0,2.0,58.0,35.0,30.0,...,0.0,0.0,0.0,29.0,0.0,110.0,0.0,,30.0,
2024-05-16 00:00:00,2M2,23,76.0,1.0,71.0,2.0,63.0,2.0,26.0,22.0,31.0,...,0.0,0.0,0.0,1.0,0.0,100.0,0.0,,44.0,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2024-05-23 00:00:00,ZNPU1,191,87.0,2.0,83.0,4.0,24.0,4.0,24.0,17.0,24.0,...,0.0,0.0,0.0,0.0,0.0,94.0,0.0,,64.0,
2024-05-23 00:00:00,ZNRN5,191,77.0,1.0,74.0,4.0,12.0,4.0,12.0,9.0,24.0,...,0.0,0.0,0.0,0.0,0.0,100.0,0.0,,67.0,
2024-05-23 00:00:00,ZONM8,191,58.0,6.0,56.0,5.0,36.0,3.0,64.0,22.0,26.0,...,0.0,0.0,0.0,53.0,0.0,69.0,0.0,,5.0,
2024-05-23 00:00:00,ZSFO1,191,82.0,4.0,76.0,4.0,62.0,4.0,42.0,24.0,25.0,...,0.0,0.0,0.0,52.0,0.0,100.0,0.0,,13.0,
