- textloader
- date: 2020-08-06
- maintainer: YZK

In [41]:
# %%bash
# jupyter nbconvert --to script textloader.ipynb

[NbConvertApp] Converting notebook textloader.ipynb to script
[NbConvertApp] Writing 19088 bytes to textloader.py


In [87]:
from datetime import datetime
import calendar
import codecs
import logging
import math
import os
import re
import sys
import multiprocessing as mp

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import linear_model 

try:
    thisd = os.path.dirname(os.path.realpath(__file__))
    sys.path.append(thisd)
    import widgets as wg
    from parallel import runFunctionsInParallel
except Exception as E:
    logging.warning(E)
    if __name__ == "__main__":
        import widgets as wg
        from parallel import runFunctionsInParallel
    else:
        import lib.widgets as wg
        from lib.parallel import runFunctionsInParallel

# print(os.environ['SHELL'])



# build smarker for gridding map

In [252]:
ind = "/NAS-129/users1/T1/DATA/Preprocess/MON"
subd = "MON"
stnf = "/home/yuzhe/CODE/ProgramT1/GRDTools/SRC/RES/GI/RR_analysis_grid_stationlist.txt"
tperiod = [1998, 2019]
# tperiod = [2019, 2019]

vname = "RR"
outd = "{}/{}".format(ind, vname)
def mnf2onef(ind, outd, stnf, tperiod, vname, subd=None, fprefix="MONTHLY_", lb=0):
    # C1I440 新高口，同時是目標站也是某站之鄰近站，補遺的時候可能沒抓到值（因為id=1或id=2）
    
    if subd is None:
        subd = ""
    
    stninfo = textloader.getGI(stnf)
    stnid = stninfo["stnid"].values
    
    stninfo.set_index("stnid", inplace=True)
    
    nstn = len(stnid)
    syear = tperiod[0]
    eyear = tperiod[1]
    nyear = eyear - syear + 1
    Y = [syear + i for i in range(nyear)]
    m = [1 + i for i in range(12)]
    
    monthly = np.ndarray((nstn, nyear, 12))
    monthly.fill(-999)
    
    dtimes = []
    for Y_ in Y:
        for m_ in m:
            dtimes.append("{0:04d}{1:02d}".format(Y_, m_))
    
    logs1 = "mnf2onef: stnid = {}, obs = {} ({}) < {}, cxnid = {}, t1 = {}, t2 = {}"
    for iidx, id_ in enumerate(stnid):
        for yidx, Y_ in enumerate(Y):

            if len(subd) == 0:
                inf = "{0}/{1}/{2}/{3}{1}_{2}_{4}.txt".format(ind, vname, Y_, fprefix, id_)
            else:
                inf = "{0}/{1}/{2}/{3}/{4}{2}_{3}_{5}.txt".format(ind, subd, vname, Y_, fprefix, id_)

            if os.path.exists(inf):
                arr = np.loadtxt(inf, dtype={"names": ("YYYYmm", "obs"), "formats": ("i4", "f8")})
                for midx, m_ in enumerate(m):
                    Ym = Y_ * 100 + m_
                    if arr[midx][0] == Ym:
                        if arr[midx][1] < lb and stninfo.loc[stnid[iidx]]["id"] == 1:
                            logging.warning(logs1.format(stnid[iidx], arr[midx][1], arr[midx][0], lb, 
                                                         stninfo.loc[stnid[iidx]]["id"], stninfo.loc[stnid[iidx]]["t1"], stninfo.loc[stnid[iidx]]["t2"]))

                        monthly[iidx, yidx, midx] = arr[midx][1]
            else:
                logging.warning("mnf2onef: {} doesn't exist!".format(inf))
    
    outdf = np.vstack([np.reshape(stnid, (1, -1)),
                       np.reshape(stninfo["lon"].values, (1, -1)),
                       np.reshape(stninfo["lat"].values, (1, -1)),
                       np.reshape(stninfo["elev"].values, (1, -1)),
                       np.reshape(monthly, (nstn, nyear * 12)).T])
    
    outdf = pd.DataFrame(outdf)
    
    idx = ["stnid", "lon", "lat", "elev"]
    idx.reverse()
    for idx_ in idx:
        dtimes.insert(0, idx_)
    outdf.index = dtimes
    outdf.columns = outdf.loc["stnid"]
    outdf.drop(index=["stnid"], inplace=True)
    
    outdf.to_csv("{}/smarker.csv".format(outd))
    
    return outdf


mnd = mnf2onef(ind, outd, stnf, tperiod, vname)



In [253]:
mnd

stnid,466900,466920,467490,466940,466990,467660,467590,467440,467080,467610,...,C0R270,C0S730,C0U750,C0W110,C0W120,C0W130,C0W140,C0W150,C0W160,C0W170
lon,121.449,121.515,120.684,121.74,121.613,121.155,120.746,120.316,121.757,121.373,...,120.362,121.475,121.953,119.981,119.468,119.323,118.409,118.329,119.45,120.484
lat,25.1649,25.0377,24.1457,25.1333,23.9751,22.7522,22.0039,22.566,24.764,23.0975,...,22.3322,22.6672,24.8417,25.9652,23.5626,23.4001,24.4883,24.4577,24.9944,26.3588
elev,19,6.3,84,26.7,16,9,22.1,2.3,7.2,33.5,...,51,110,398,16,58,18,21,42,49,42
199801,185.6,138.7,123.2,244.2,55,17.9,24,56.3,86.8,50.5,...,18.5,135.5,-999,-999,-999,-999,-999,-999,-999,-999
199802,422.3,424.5,372.8,541.5,195,121.3,13,71.1,263.8,95,...,48.5,95,-999,-999,-999,-999,-999,-999,-999,-999
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
201908,192,212.4,768,173.3,289,216.4,839.6,726.2,218,437.4,...,795.5,211.5,78.5,14,164.5,138,84,72.5,23,42.5
201909,657,377.1,133.5,659.1,228,202.1,356.5,247,514.6,158.7,...,144,238.5,209.5,16,0,0.5,7.5,2,15.5,79.5
201910,32.5,27.1,11.5,152.3,157,25.3,28,52.5,228.7,117,...,40,89.5,50,2.5,0.5,0.5,0,0,0,17
201911,46.5,13.8,0,219,109.5,8.4,2.5,2,136.1,40.1,...,7.5,72,65,0,0,4,0,0,0,4


In [213]:
stninfo = textloader.getGI(stnf)

stninfo

Unnamed: 0_level_0,stnid,lon,lat,elev,t1,t2,id,chname
#SN,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,466900,121.4489,25.1649,19.0,1911010101,2018051099,1,淡水
2,466920,121.5149,25.0377,6.3,1911010101,2018051099,1,臺北
3,467490,120.6841,24.1457,84.0,1911010101,2018051099,1,臺中
4,466940,121.7405,25.1333,26.7,1911010101,2018051099,1,基隆
5,466990,121.6133,23.9751,16.0,1911010101,2018051099,1,花蓮
...,...,...,...,...,...,...,...,...
330,C0W130,119.3228,23.4001,18.0,2016070101,2018051099,2,花嶼
331,C0W140,118.4088,24.4883,21.0,2016100501,2018051099,2,金沙
332,C0W150,118.3290,24.4577,42.0,2016100501,2018051099,2,金寧
333,C0W160,119.4496,24.9944,49.0,2016100501,2018051099,2,烏坵


In [220]:
np.vstack([np.reshape(stninfo["stnid"].values, (1, -1)), 
           np.reshape(stninfo["lon"].values, (1, -1)), 
           np.reshape(stninfo["lat"].values, (1, -1)),
           np.reshape(stninfo["elev"].values, (1, -1))])


(4, 334)

In [73]:
class textloader():
    def __init__(self, ind, outd="output", logd="log"):
        self.ind  = ind
        self.outd = outd
        
    @staticmethod
    def get_id(fname):
        try:
            bincode = codecs.open(fname, mode="r", encoding="big5")  # default encoding='utf-8', unicode transformation format
        except Exception as e :
            logging.error("textloader-getid-11: ", e)
            sys.exit(-1)

        stnlist_ = pd.read_csv(bincode, sep="\s+", header=0) 
        stnlist_.columns = ["#SN", "stnid", "lon", "lat", "elev", "t1", "t2", "id", "chname"]
#         return(stnlist_["stnid"].values)
        return(stnlist_["stnid"].values.tolist())
    
    @staticmethod
    def getGI(fname):
        try:
            bincode = codecs.open(fname, mode="r", encoding="big5")  # default encoding='utf-8', unicode transformation format
        except Exception as e :
            logging.error("textloader-getid-11: ", e)
            sys.exit(-1)

        stnlist_ = pd.read_csv(bincode, sep="\s+", header=0) 
        stnlist_.columns = ["#SN", "stnid", "lon", "lat", "elev", "t1", "t2", "id", "chname"]
    #         return(stnlist_["stnid"].values)
        stnlist_.set_index("#SN", inplace=True)
        return(stnlist_)
    
    
    
    
    @staticmethod
    def cmtval(ind, outd, stnid, syr, eyr, hrfp="hr_rr", mnfp="mn_rr", showarn=True):
        '''
            calculating cmt val for precp (hourly to monthly)
            inf: old hr format, header is stn_id, yyyymmdd, 01, ..., 24
            hrfp: prefix of hourly data (input) 
            mnfp: prefix of monthly data (output)
        '''
        
        mday = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
        
        hrfprefix = hrfp
        mnfprefix = mnfp 

        MNDA = np.ndarray((len(stnid), 12), dtype=np.float64)  # monthly data array
        NHR  = np.ndarray((len(stnid), 12), dtype=np.int64)    # number of hours in month

        MNDA.fill(0.0)
        NHR.fill(0)

        years = [syr + idx for idx in range(eyr - syr + 1)]

        miss_val = []
        for idx, id_ in enumerate(stnid):
            for YYYY in years:
                
                fname = "{0}_{1}_{2}.txt".format(hrfprefix, str(YYYY), id_)
                inf = os.path.join(ind, str(YYYY), fname)

                if not os.path.exists(inf):
                    if showarn:
                        logging.warning("textloader-cmtval-50: {} doesn't exist!".format(inf))
                    continue

                df = pd.read_csv(inf, header=0, sep="\s+")
                df["date"] = pd.to_datetime(df["yyyymmdd"], format="%Y%m%d")
                df.set_index("date", inplace=True)
                df.drop(["stn_id", "yyyymmdd"], axis=1, inplace=True)

                fid = open("{0}/MON/{1}_{2}_{3}.txt".format(outd.strip(), mnfprefix, str(YYYY), id_), "w")

                for mm in range(12):
                    fid.write("{0:04d}{1:02d} ".format(YYYY, mm + 1))
                    t1 = "{0:04d}{1:02d}01".format(YYYY, mm + 1)
                    t2 = "{0:04d}{1:02d}{2:02d}".format(YYYY, mm + 1, mday[mm])
                    if df.index[-1].is_leap_year and mm + 1 == 2:  # for February in leap year
                        t2 = "{0:04d}{1:02d}{2:02d}".format(YYYY, mm + 1, mday[mm] + 1)

                    submat = df.loc[t1:t2].values
                    missing_idx = np.where(submat < 0)
                    submat_ = submat[np.where(submat >= 0)]
                    if submat_.size <= 0:
                        MNDA[idx, mm] += 0.0
                        NHR[idx, mm] += 0
                        fid.write("{0:>8.1f} {1:>8d}\n".format(-999, 0))
                    else:
                        MNDA[idx, mm] += np.nansum(submat_)
                        NHR[idx, mm] += 1
                        fid.write("{0:>8.1f} {1:>8d}\n".format(np.nansum(submat_), submat_.size))

                fid.close()

                nparr = df.values
                miss_val_ = np.unique(nparr[nparr < 0])
                for val_ in miss_val_:
                    if val_ not in miss_val:
                        miss_val.append(val_)

        # cmtval output
        if not os.path.exists(outd + "/CV"):
            os.makedirs(outd + "/CV")

        with open("{0}/CV/CV_M_RR.txt".format(outd.strip()), "w") as fid:
            fid.write("#stnId ")
            for mm in range(12):
                fid.write("      {0:>02d} ".format(mm + 1))
            fid.write("\n")
            for idx, id_ in enumerate(stnid): 
                fid.write("{0:6s} ".format(id_))
                for mm in range(12):
                    if NHR[idx, mm] > 0:
                        MNDA[idx, mm] = MNDA[idx, mm] / float(NHR[idx, mm])
                    else:
                        MNDA[idx, mm] = -999.0
                    fid.write("{0:>8.1f} ".format(MNDA[idx, mm]))
                fid.write("\n")

        with open("{0}/CV/NCV_M_RR.txt".format(outd.strip()), "w") as fid:
            fid.write("#stnId ")
            for mm in range(12):
                fid.write("      {0:>02d} ".format(mm + 1))
            fid.write("\n")
            for idx, id_ in enumerate(stnid): 
                fid.write("{0:6s} ".format(id_))
                for mm in range(12):
                    if NHR[idx, mm] > 0:
                        fid.write("{0:>8d} ".format(NHR[idx, mm]))
                    else:
                        fid.write("{0:>8d} ".format(-999))
                fid.write("\n")

        return [MNDA, NHR, miss_val]
    
    @staticmethod
    def _fdf(fname):
#         fname = "{0}/{1}/{2}_{1}_{3}.txt".format(ind, YYYY, hrfp, id_)
        if not os.path.exists(fname):
            logging.warning("textloader-hrf-140: {} doesn't exist!".format(fname))
            return None

        df = pd.read_csv(fname, header=0, sep="\s+")
        df["date"] = pd.to_datetime(df["yyyymmdd"], format="%Y%m%d")
        df.set_index("date", inplace=True)
        df.drop(["stn_id", "yyyymmdd"], axis=1, inplace=True)
        return df
    
    
    @staticmethod
    def hrf(ind, stnid, sdtime, edtime, hrfp="hr_rr", lb=0.0):

        '''
            load hourly data 
        '''
        
        syr = math.floor(sdtime / 10**6)
        eyr = math.floor(edtime / 10**6)

        YYYYmmddHH0000 = wg.YYYYmmddHH0000(syr, eyr, 1)
        YYYYmmddHH_ = [math.floor(YmdH / 10**4) for YmdH in YYYYmmddHH0000]

        sidx = YYYYmmddHH_.index(sdtime)
        eidx = YYYYmmddHH_.index(edtime)
        YYYYmmddHH = YYYYmmddHH_[sidx:(eidx + 1)]
        logging.info("textloader-hrf-138: sdtime: {}, edtime: {}, nsample: {}".format(YYYYmmddHH[0], YYYYmmddHH[-1], len(YYYYmmddHH)))

        # %%timeit -n 1

        nyr = eyr - syr + 1
        nstn = len(stnid)
        nsample = len(YYYYmmddHH)

        
        obs_ = np.ndarray(shape=(nstn, nyr, 366, 24))  # datetime to array index, don't use datetime to search index in YYYYmmddHH (not efficient)
        obs_.fill(-999)
        
        obs = np.ndarray(shape=(nstn, nsample))
        obs.fill(-999)

        years = [syr + i for i in range(nyr)]

        for ididx, id_ in enumerate(stnid):
#             dfs = []
            for yidx, YYYY in enumerate(years):
                fname = "{0}/{1}/{2}_{1}_{3}.txt".format(ind, YYYY, hrfp, id_)
                if not os.path.exists(fname):
                    logging.warning("textloader-hrf-156: {} doesn't exist!".format(fname))
                    continue

                df = pd.read_csv(fname, header=0, sep="\s+")
#                 df["date"] = pd.to_datetime(df["yyyymmdd"], format="%Y%m%d")
                df["date"] = df["yyyymmdd"]

                df.set_index("date", inplace=True)

                df.drop(["stn_id", "yyyymmdd"], axis=1, inplace=True)
#                 dfs.append(df)

#                 dtimes = df.index.values
                dtimes = df.index.tolist()

                for dtime in dtimes:
                  
                    Y_, m_, d_, H_ = wg.datetime_split(dtime, hour_system=1) 

                    for H_ in range(24):
                        ttuple = datetime.strptime("{0:04d}{1:02d}{2:02d}".format(Y_, m_, d_), "%Y%m%d").timetuple()
#                         logging.debug("hrf-209: {0}, {1:04d}-{2:02d}-{3:02d} {4:02d}:00:00".format(dtime, Y_, m_, d_, H_))
                        obs_[ididx, yidx, ttuple[7] - 1, H_] = df.loc[dtime][H_]
                    
            # reshape
            idx = 0
            dtidx = 0
            for yidx, YYYY in enumerate(years):
                yday = 365
                if calendar.isleap(YYYY):
                    yday += 1
                logging.debug("hrf-219: check yday={0}, dtidx={1}".format(yday, dtidx))

                for didx in range(yday):
                    for hidx in range(24):
                        YmdH = YYYYmmddHH_[idx]
                        idx += 1
                        if YYYYmmddHH[0] <= YmdH and YmdH <= YYYYmmddHH[-1]:
                            obs[ididx, dtidx] = obs_[ididx, yidx, didx, hidx]
                            dtidx += 1

                             
#             if len(dfs) == 0:
#                 continue

#             df = pd.concat(dfs, axis=0)

#             dtidx = 0
#             for YmdH in YYYYmmddHH:
#                 Ymd = math.floor(YmdH / 100.0)
#                 HH = YmdH - Ymd * 100 - 1
#                 obs[ididx, dtidx] = df.loc[str(Ymd)][HH]
#                 dtidx += 1

        quantity = np.where(obs >= lb)[0].shape[0] / obs.size

        logging.info("textloader-hrf-quantity: {}".format(quantity))

        return [obs, stnid, YYYYmmddHH, quantity]
    
    @staticmethod
    def mnf(inf, stnid, sdtime, edtime, vname="VNAME", outd=None, cvd=None, lb=0.0, nmn=10):

        '''
            load monthly data "db_data_{vname}.txt and 
                (1) reformat it to "{outd}/MON/{YYYY}/MONTHLY_{vname}_{YYYY}_{id_}.txt" if {outd} is not None
                (2) calculate cmt val and output to {cvd} if {cvd} is not None
        '''
        
        syr  = math.floor(sdtime / 10**2)
        eyr  = math.floor(edtime / 10**2)
        nyr  = eyr - syr + 1
        nstn = len(stnid)

        YYYYmm_ = wg.YYYYmm(syr, eyr)
        sidx = YYYYmm_.index(sdtime)
        eidx = YYYYmm_.index(edtime)
        YYYYmm = YYYYmm_[sidx:(eidx + 1)]
        nsample = len(YYYYmm)

        # ndarray declare
        cv = np.ndarray((nstn, nyr, 12))
        cv.fill(-999)

        obs = np.ndarray((nstn, nsample))
        obs.fill(-999)

        # load db data
        df = pd.read_csv(inf, sep="\s+", header=0)
        df.set_index("datetime", inplace=True)

        dfdt = df.index.to_list()
        sidx = dfdt.index(sdtime)
        eidx = dfdt.index(edtime)

        for ididx, id_ in enumerate(stnid):
            if id_ in df.columns:
                obs[ididx, :] = df[id_].values[sidx:(eidx + 1)]
                for Ym in YYYYmm:
                    Y_ = math.floor(Ym / 100.0)
                    m_ = Ym - Y_ * 100
                    idx = dfdt.index(Ym)
                    cv[ididx, Y_ - syr, m_ - 1] = df.loc[Ym][id_]
        
        if outd is not None:
            for yridx in range(nyr):
                Y_ = syr + yridx
                
                if not os.path.exists("{}/MON/{}/{}".format(outd, vname, Y_)):
                    os.makedirs("{}/MON/{}/{}".format(outd, vname, Y_))
                
                for ididx, id_ in enumerate(stnid):
                    with open("{0}/MON/{1}/{2}/MONTHLY_{1}_{2}_{3}.txt".format(outd, vname, Y_, id_), "w") as fid:
                        for m_ in range(12):
                            fid.write("{0:04d}{1:02d}{2:9.1f}\n".format(Y_, m_ + 1, cv[ididx, yridx, m_]))

        if cvd is not None:
            cv[np.where(cv < lb)] = np.nan
            cmt_val = np.ndarray((nstn, 12))
            cmt_val.fill(-999)
            mncounter = np.zeros((nstn, 12), dtype=np.int32)
            for ididx in range(nstn):
                cmt_val[ididx, :] = np.nanmean(cv[ididx, :, :], axis=0)
                mncounter[ididx, :] = (~np.isnan(cv[ididx, :, :])).sum(axis=0)

            cmt_val[np.isnan(cmt_val)] = -999

            cmt_val[np.where(mncounter < nmn)] = -999
            
            if not os.path.exists(cvd):
                os.makedirs(cvd)

            cvfid  = open("{0}/CV_MN_{1}.txt".format(cvd, vname), "w")
            ncvfid = open("{0}/NCV_MN_{1}.txt".format(cvd, vname), "w")

            cvfid.write("#stnId")
            ncvfid.write("#stnId")
            for mnidx in range(12):
                cvfid.write("{0:7s}{1:02d}".format(" ", mnidx + 1))
                ncvfid.write("{0:7s}{1:02d}".format(" ", mnidx + 1))
            cvfid.write("\n")
            ncvfid.write("\n")

            for ididx in range(nstn):
                cvfid.write(stnid[ididx])
                ncvfid.write(stnid[ididx])
                for mnidx in range(12):
                    cvfid.write("{0:9.1f}".format(cmt_val[ididx, mnidx]))
                    ncvfid.write("{0:9d}".format(mncounter[ididx, mnidx]))
                cvfid.write("\n")
                ncvfid.write("\n")

            cvfid.close()
            ncvfid.close()

        quantity = np.where(obs >= lb)[0].shape[0] / obs.size

        logging.info("textloader-mnf-quantity: {}".format(quantity))

        return [obs, stnid, YYYYmm, quantity]

    @staticmethod
    def ossef(ind, stnid, nvar, syr, eyr, fprefix = "OSSE_"):

        '''
            load output of osse for hourly, daily and monthly data
        '''

        nyr = eyr - syr + 1
        Ymd = wg.YYYYmmdd(syr, eyr)
        ndt = len(Ymd)

        HH = [1 + i for i in range(24)]
        YYYY = [syr + i for i in range(nyr)]

        osseH = np.ndarray((24, 366, nyr, nstn, nvar))
        osseH.fill(-999)
        logging.info("textloader-ossef-298: load osse for hourly data")
        for dt_idx, Ymd_ in enumerate(Ymd):
            dtuple = datetime.strptime(str(Ymd_), "%Y%m%d").timetuple()
            Y_ = dtuple[0] 
            M_ = dtuple[1]
            wday = dtuple[6]
            wg.display_progress(dt_idx, ndt, 2)
            for H_ in HH:
                df = pd.read_csv("{0}/{1}{2}{3:02d}.txt".format(ind, fprefix, Ymd_, H_), sep="\s+", header=None)
                if nvar == 3:
                    df.columns = ["id", "lon", "lat", "elev", "obs", "est", "thmval"]
                else:
                    df.columns = ["id", "lon", "lat", "elev", "obs", "est"]
                df.set_index("id", inplace=True)
                for id_idx, id_ in enumerate(stnid):
                    osseH[H_ - 1, wday - 1, Y_ - syr, id_idx, 0] = df.loc[id_]["obs"]
                    osseH[H_ - 1, wday - 1, Y_ - syr, id_idx, 1] = df.loc[id_]["est"]
                    if nvar == 3:
                        osseH[H_ - 1, wday - 1, Y_ - syr, id_idx, 2] = df.loc[id_]["thmval"]

        osseD = np.ndarray((366, nyr, nstn, nvar))
        osseD.fill(-999)
        logging.info("textloader-ossef-320: load osse for daily data")
        for dt_idx, Ymd_ in enumerate(Ymd):
            dtuple = datetime.strptime(str(Ymd_), "%Y%m%d").timetuple()
            Y_ = dtuple[0] 
            M_ = dtuple[1]
            wday = dtuple[6]
            wg.display_progress(dt_idx, ndt, 2)
            df = pd.read_csv("{0}/{1}{2}.txt".format(ind, fprefix, Ymd_), sep="\s+", header=None)
            if nvar == 3:
                df.columns = ["id", "lon", "lat", "elev", "obs", "est", "thmval"]
            else:
                df.columns = ["id", "lon", "lat", "elev", "obs", "est"]
            df.set_index("id", inplace=True)
            for id_idx, id_ in enumerate(stnid):
                osseD[wday - 1, Y_ - syr, id_idx, 0] = df.loc[id_]["obs"]
                osseD[wday - 1, Y_ - syr, id_idx, 1] = df.loc[id_]["est"]
                if nvar == 3:
                    osseD[wday - 1, Y_ - syr, id_idx, 2] = df.loc[id_]["thmval"]

        osseM = np.ndarray((12, nyr, nstn, nvar))
        osseM.fill(-999)
        lcounter = 0  # for progress display
        logging.info("textloader-ossef-342: load osse for monthly data")
        for yr_idx, Y_ in enumerate(YYYY):
            for m_ in range(12):
                wg.display_progress(lcounter, nyr * 12, 2)
                lcounter += 1
                df = pd.read_csv("{0}/{1}{2}{3:02d}.txt".format(ind, fprefix, Y_, m_ + 1), sep="\s+", header=None)
                if nvar == 3:
                    df.columns = ["id", "lon", "lat", "elev", "obs", "est", "thmval"]
                else:
                    df.columns = ["id", "lon", "lat", "elev", "obs", "est"]
                df.set_index("id", inplace=True)
                for id_idx, id_ in enumerate(stnid):
                    osseM[m_, yr_idx, id_idx, 0] = df.loc[id_]["obs"]
                    osseM[m_, yr_idx, id_idx, 1] = df.loc[id_]["est"]
                    if nvar == 3:
                        osseM[m_, yr_idx, id_idx, 2] = df.loc[id_]["thmval"]

        return [osseH, osseD, osseM]

# staticmethod test

In [86]:
if __name__ == "__main__":
    
    logging.getLogger().setLevel(logging.DEBUG)

    # cmtval
    ind = "/NAS-129/users1/T1/DATA/YY/ORG/HR1/RR"
    outd = "/NAS-129/users1/T1/DATA/YY/ORG"
    stninfo = "/home/yuzhe/CODE/ProgramT1/GRDTools/SRC/RES/GI/RR_analysis_grid_stationlist.txt"
    fprefix = "hr_rr"
    syr = 1998
    eyr = 2018
    # stnid = textloader.get_id(stninfo)
    # MNDA, NHR, miss_val = textloader.cmtval(ind, outd, stnid, syr, eyr, hrfp=fprefix, showarn=False)
    
    # load monthly data (format: all in one) and calculate climate values
    stninfo = "/home/yuzhe/CODE/ProgramT1/GRDTools/SRC/RES/GI/RR_analysis_grid_stationlist.txt"
#     inf = "/NAS-129/users1/T1/DATA/YY/ORG/MON/db_data_Precp.txt"
#     inf = "/NAS-129/users1/T1/DATA/YY/ORG/MON/Precp_1981_1997.txt"
#     inf = "/NAS-129/users1/T1/DATA/YY/ORG/MON/Precp_1998_2019.txt"
#     inf = "/NAS-129/users1/T1/DATA/YY/ORG/MON/Precp_1981_2019.txt"
    inf = "/NAS-129/users1/T1/DATA/YY/CXN/MON/Precp_1981_2019.txt"

#     outd = '/NAS-129/users1/T1/DATA/YY/ORG'
    outd = '/NAS-129/users1/T1/DATA/YY/CXN'
    cvd = '/NAS-129/users1/T1/DATA/YY/ORG/CV'  # 1998 ~ 2018
#     cvd = '/NAS-129/users1/T1/DATA/YY/ORG/CV/CV_1981_1997'
#     cvd = '/NAS-129/users1/T1/DATA/YY/ORG/CV/CV_1998_2019'

#     sdtime = 198101
#     edtime = 201912
    
    sdtime = 199801
    edtime = 201812
    stnid = textloader.get_id(stninfo)
    obs, stnid, YYYYmm, quantity = textloader.mnf(inf, stnid, sdtime=sdtime, edtime=edtime, vname="RR", outd=outd, cvd=cvd, lb=0.0)
    
    # cvd=None
    sdtime = 198101
    edtime = 201912        
    obs, stnid, YYYYmm, quantity = textloader.mnf(inf, stnid, sdtime=sdtime, edtime=edtime, vname="RR", outd=outd, cvd=None, lb=0.0)

    # 2. hrf test
    ind = "/NAS-129/users1/T1/DATA/YY/ORG/HR1/RR"
    sdtime = 1998010101
    edtime = 1999013124
    hrfp = "hr_rr"
    stnid = textloader.get_id(stninfo)
#     obs, stnid, YYYYmmddHH, quantity = textloader.hrf(ind, stnid, sdtime=sdtime, edtime=edtime, hrfp=hrfp)
    
    # 3. ossef 
    wg.get_pentad(20120302, 123)    
    ind = "/NAS-129/users1/T1/DATA/GRD/OSSE/UNC/OSSE/PP"
    gid = "/NAS-129/users1/T1/DATA/YY/GI/PP_analysis_grid_stationlist.txt"
    syr = 1998
    eyr = 2017
    nvar = 3
#     stnid = textloader.get_id(gid)
#     osseH, osseD, osseM = textloader.ossef(ind, stnid, nvar, 2011, 2011)

INFO:root:textloader-mnf-quantity: 0.9541868643665051
INFO:root:textloader-mnf-quantity: 0.6772864527355545
