Data can be download from the following link
https://search.earthdata.nasa.gov/portal/ghrc/search?fsm0=Atmospheric%20Electricity&fst0=Atmosphere

* Last modified 7-18-2022

In [2]:
import re
import pandas as pd
import string

In [3]:
fname = "NALMA_220712_235000_0600.dat"

In [4]:
# Match string
expr_data ='([^+-]\d+.\d+)'\
    '(\s+[+-]?\d+[.]?\d+)'\
        '(\s+[+-]?\d+[.]?\d+)'\
            '(\s+[+-]?\d+[.]?\d+)'\
                '(\s+[+-]?\d+[.]?\d+)'\
                    '(\s+[+-]?\d+[.]?\d+)'\
                        '(\s[0-9a-zA-Z]*)'

expr_station_info = '(?<=(?:Sta_info):[\s*])'\
    '([A-Z]?)(\D+)([+-]?\d+[.]?\d+)'\
        '(\s+[+-]\d+[.]\d+)'\
            '(\s+\d+[.]?\d+)'\
                '(\s+\d+[.]?\d+)'\
                    '(\s+\d+)(\s+\d+)'
expr_station_data = '(?<=(?:Sta_data):[\s*])'\
    '([A-Z]?)(\D+)'\
        '(\d+)(\s+\d+)'\
            '(\s+\d+)'\
                '(\s+\d+)'\
                    '(\s+\d+[.]?\d+)'\
                        '(\s+\d+[.]?\d+)'\
                            '(\s+\w+)'

expr = [expr_data,expr_station_info,expr_station_data]

In [5]:

def extract_NALMA(expr,fname):

    with open(fname,mode='rt') as f:
        nalma_data = f.read()
        get_data = re.findall(expr[0], nalma_data)
        df = pd.DataFrame(get_data,columns=['Time (UT sec of day)','Latitude','Longitude', 'Altitude (m)', 'reduced chi^2', 'P(dBW)','mask'])
        df = df.astype({'Time (UT sec of day)':'float32',
                        'Latitude':'float32',
                        'Longitude':'float32',
                        'Altitude (m)':'float32',
                        'reduced chi^2':'float32',
                        'P(dBW)':'float32',
                        'mask':'object'});

        station_info = re.findall(expr[1],nalma_data)
        stinfo_ = pd.DataFrame(station_info)

        station_data = re.findall(expr[2],nalma_data)
        stdata_ = pd.DataFrame(station_data)

        df_stinfo = pd.concat([stinfo_,stdata_],axis=1,join='outer',ignore_index=True).drop(columns=[8,9])

        df_stinfo.rename(columns={0:'id',1:'name',2:'lat(d)',
        3:'lon(d)',4:'alt(m)',5:'delay(ns)',
        6:'board_rev',7:'rec_ch',10:'win(us)',
        11:'dec_win(us)',12:'data_ver',13:'rms_error(ns)',
        14:'sources(%)',15:'<P/P_m>',16:'active'},inplace= True)

        return df,df_stinfo
        

In [6]:
df,df_stinfo = extract_NALMA(expr,fname)

In [7]:
df

Unnamed: 0,Time (UT sec of day),Latitude,Longitude,Altitude (m),reduced chi^2,P(dBW),mask
0,85800.070312,35.747196,-86.212585,9738.049805,0.00,5.1,0x009f
1,85800.093750,35.715305,-86.261909,16402.820312,2.31,-0.8,0x00bf
2,85800.109375,35.738178,-86.221741,9756.089844,0.20,2.0,0x009f
3,85800.109375,35.752926,-86.243515,24828.080078,3.53,2.7,0x003f
4,85800.117188,35.746391,-86.207420,9738.559570,0.44,1.2,0x009f
...,...,...,...,...,...,...,...
112500,86399.953125,35.933285,-85.762215,13189.690430,0.83,11.4,0x1ac1
112501,86399.976562,35.562408,-86.206833,11683.780273,2.77,10.6,0x14c6
112502,86399.976562,35.973660,-86.051941,9976.280273,1.93,5.1,0x08db
112503,86399.976562,35.985226,-86.050087,17343.919922,2.73,9.3,0x1a8f


In [8]:
df_stinfo

Unnamed: 0,id,name,lat(d),lon(d),alt(m),delay(ns),board_rev,rec_ch,win(us),dec_win(us),data_ver,rms_error(ns),sources(%),<P/P_m>,active
0,A,firetower,34.8092586,-87.0357225,207.62,720,3,3,80,10,70,62622,55.7,0.19,A
1,B,boeing,34.6433808,-86.7714025,174.4,822,3,3,80,10,70,94928,84.4,0.08,A
2,C,annex,34.7253536,-86.6449781,198.3,815,3,3,80,12,70,30618,27.2,1.33,A
3,D,keel,34.6656331,-86.3586129,486.5,999,3,3,80,10,70,101487,90.2,0.8,A
4,E,mtsano,34.7455622,-86.5126506,507.4,962,3,3,80,10,70,32050,28.5,0.37,A
5,F,ardmore,34.9836028,-86.8392592,257.13,26,3,3,80,10,70,13804,12.3,0.31,A
6,G,aamu,34.8996936,-86.5578487,218.6,945,3,3,80,10,70,38976,34.6,1.99,A
7,H,green,34.6121906,-86.5196873,465.2,954,3,3,80,10,70,99650,88.6,0.55,A
8,I,owen,34.7860321,-86.8247285,229.8,891,3,3,0,0,70,0,0.0,0.0,
9,J,hospital,34.5231382,-86.9681644,213.7,918,3,3,80,10,70,66658,59.2,5.88,A
