# Generation of APEX Weather Files
This notebook contains scripts to format climate data to .dly and .hly files for APEX.

In [6]:
import os
import requests
import pandas as pd
from io import StringIO
import datetime
import shutil
import subprocess
import numpy as np

def convert_julian(year,doy):
    date = datetime.datetime(year, 1, 1) + datetime.timedelta(days=doy - 1)
    return date.month, date.day

## From NASA POWER

### Load data using API

In [57]:
# set parameters for API search

# edit these
timestep = 'daily' # make sure that param_subset matches
longitude = '-80.0598'
latitude = '40.443'
start = '20130101'
end = '20221231'
elevation = '372.8'

# data needed for hly and dly files
params_hly = [
    'PRECTOTCORR'
]
    
params_dly = [
    'T2M_MAX',
    'T2M_MIN',
    'RH2M',
    'ALLSKY_SFC_SW_DWN',
    'PRECTOTCORR',
    'WS10M'
]
# USE PARAMS
param_subset = ','.join(params_hly) # change argument based on timestep used

In [60]:
url = f'https://power.larc.nasa.gov//api/temporal/{timestep}/point?parameters={param_subset}&community=AG&longitude={longitude}&latitude={latitude}&start={start}&end={end}&format=CSV'

response = requests.get(url)

In [61]:
print(url)

https://power.larc.nasa.gov//api/temporal/daily/point?parameters=PRECTOTCORR&community=AG&longitude=-80.0598&latitude=40.443&start=20130101&end=20221231&format=CSV


In [55]:
data = response.text
print(data)

-BEGIN HEADER-
NASA/POWER Source Native Resolution Hourly Data 
Dates (month/day/year): 01/01/2013 through 12/31/2022 in LST
Location: Latitude  40.443   Longitude -80.0598 
Elevation from MERRA-2: Average for 0.5 x 0.625 degree lat/lon region = 332.12 meters
The value for missing source data that cannot be computed or is outside of the sources availability range: -999 
Parameter(s): 
PRECTOTCORR     MERRA-2 Precipitation Corrected (mm/hour) 
-END HEADER-
YEAR,MO,DY,HR,PRECTOTCORR
2013,1,1,0,2.47
2013,1,1,1,2.35
2013,1,1,2,2.61
2013,1,1,3,2.84
2013,1,1,4,2.78
2013,1,1,5,3.45
2013,1,1,6,4.32
2013,1,1,7,6.05
2013,1,1,8,4.56
2013,1,1,9,2.82
2013,1,1,10,1.65
2013,1,1,11,1.89
2013,1,1,12,2.04
2013,1,1,13,1.8
2013,1,1,14,0.96
2013,1,1,15,0.42
2013,1,1,16,0.46
2013,1,1,17,0.42
2013,1,1,18,0.42
2013,1,1,19,0.62
2013,1,1,20,0.33
2013,1,1,21,0.13
2013,1,1,22,0.07
2013,1,1,23,0.06
2013,1,2,0,0.03
2013,1,2,1,0.02
2013,1,2,2,0.01
2013,1,2,3,0.0
2013,1,2,4,0.0
2013,1,2,5,0.0
2013,1,2,6,0.0
2013,1,2,

### .DLY format

In [None]:

def nasa2apex(nasa_data, dly_fp, dly_fn, wxpm_fp, hly, spinup, dat_update):
    
    '''
    formats nasa power data to APEX .dly
    
    parameters:
        nasa_data: data downloaded using API above
        output_fp: directory path to output .dly
        output_fn: filename for output .dly 
        wpxpm_fp: for monthly (wp1) generation, fp to wxpm executable ("None" to override)
                *note: file must contain both wxpm.exe and wxpmrun.DAT
        spinup[y/n]: y duplicates first year of data 4x to account for model spinup time ("None" to override)
        dat_update[y/n]: y updates .DAT file with date from API ("None" to override)
                *note: need to have loaded API data before running this function (uses variables from API search)
    ''' 

    # read data into df
    dly_df = pd.read_csv(StringIO(nasa_data), delimiter=',', skiprows=14)

    # add columns MONTH and DAY
    dly_df['MONTH'], dly_df['DAY'] = zip(*dly_df.apply(lambda row: convert_julian(int(row['YEAR']), int(row['DOY'])), axis=1))
    
    # set column names to match .dly
    dly_colnames = {'T2M_MAX':'TMAX',
                    'T2M_MIN':'TMIN',
                    'PRECTOTCORR':'PRCP',
                    'RH2M':'RH',
                    'ALLSKY_SFC_SW_DWN':'SRAD',
                    'WS10M':'WSPD'}
    dly_df = dly_df.rename(columns=dly_colnames)
    
    # rearrange columns
    dly_df = dly_df[['YEAR','MONTH','DAY','SRAD','TMAX','TMIN','PRCP','RH','WSPD']]
    
    # build path to .dly file
    dly_path = os.path.join(dly_fp, dly_fn)

    # write .dly file 
    with open(dly_path, 'w') as file:
        for _, row in dly_df.iterrows():
            file.write(f'{int(row['YEAR']):6d} {int(row['MONTH']):3d} {int(row['DAY']):3d} {float(row['SRAD']):5.1f} {float(row['TMAX']):5.1f} {float(row['TMIN']):5.1f} {float(row['PRCP']):5.1f} {float(row['RH']):5.1f} {float(row['WSPD']):5.1f}\n')

    
    # generate wp1 file (if specified)
    if wxpm_fp is not None:
       dly2wp1(dly_fp, dly_fn, wxpm_fp)

    # generate .hly file via equal disaggregation (if specified)
    if hly is not None:
        dly2hly(dly_fp, dly_fn)
    
    # duplicate first year 4x for spinup (if specified)
    if spinup is not None:
        add_spinup(dly_fp, dly_fn)
        if hly is not None:
            hly_fn = dly_fn.replace('.dly', '.hly')
            add_spinup(dly_fp, hly_fn)
        
    # write to .DAT files
    

nasa_data = data
dly_fp = 'C:\\APEX\\apex1501-20241028\\other tests'
dly_fn = 'test1.dly'
wxpm_fp = 'C:\\APEX\\wxpm-03082019\\'
nasa2apex(nasa_data, dly_fp, dly_fn, wxpm_fp, 1, 1)

In [15]:
dly_df = pd.read_csv(StringIO(data), delimiter=',', skiprows=14)

def dly2wp1(dly_fp, dly_fn, wxpm_fp):
    
    '''
    converts .dly file to .wp1 using wxpm
    
    parameters:
        dly_fp: path to file containing source .dly (and destination for wp1)
        dly_fn: .dly filename
        wxpm_fp: path to file containing wxpmrun.dat and wxpm.exe
    '''
    
    # set file paths
    src_dly = os.path.join(dly_fp, dly_fn)
    wxpm_dly = os.path.join(wxpm_fp, dly_fn)
    wxpm_run = os.path.join(wxpm_fp, 'wxpmrun.dat')
    wxpm_exe = os.path.join(wxpm_fp, 'wxpm.exe')
    wp1_fn = os.path.splitext(dly_fn)[0] + '.wp1'
    out_wp1 = os.path.join(dly_fp, wp1_fn)

    # copy DLY file into WXPM folder
    shutil.copy(src_dly, wxpm_dly)

    # update wxpmrun.dat
    df = pd.read_csv(src_dly)
    yr1 = df.iloc[:,0].min() # get start year of data
    base_name = os.path.splitext(dly_fn)[0] # get name of dly file without extension
    with open(wxpm_run, 'r') as f:
        lines = f.readlines()
    lines[0] = f"{base_name} {yr1}\n"
    with open(wxpm_run, 'w') as f:
        f.writelines(lines)

    # run wxpm.exe
    subprocess.run([wxpm_exe], cwd=wxpm_fp)

    # copy generated wp1 to dly folder
    shutil.copy(os.path.join(wxpm_fp, wp1_fn), out_wp1)


In [25]:

dly_fp = 'C:\\APEX\\apex1501-20241028\\other tests'
dly_fn = 'TX1136.dly'
wxpm_fp = 'C:\\APEX\\wxpm-03082019'

dly2wp1(dly_fp, dly_fn, wxpm_fp)

#### .DLY and HLY with spinup time (4x first year)

In [35]:
def add_spinup(fp,fn):
    
    '''
    adds spinup time to .dly or .hly file (4x first year)
    
    parameters:
        fp: path to weather file
        fn: name of weather file
    '''

    file_path = os.path.join(fp, fn)

    # read data in as df
    weather_df = pd.read_csv(file_path, header=None, sep='\s+')
    if file_path.split('.')[1] == 'dly':
        weather_df.columns =['YEAR','MONTH','DAY','SRAD','TMAX','TMIN','PRCP','RH','WSPD']
        file_type = 'dly'
    if file_path.split('.')[1] == 'hly':
        weather_df.columns =['YEAR','MONTH','DAY','HOUR','RFDT']
        file_type = 'hly'

    # get first year of data (column 1)
    yr1 = weather_df.iloc[:,0].min()

    # duplicate first year 3x
    yr1_data = weather_df[weather_df.iloc[:,0] == yr1]
    duplicated_data = pd.concat([yr1_data]*3, ignore_index=True)

    # add duplicated data to existing file
    weather_df = pd.concat([duplicated_data, weather_df], ignore_index=True)

    # write updated file
    # new file name keeps the same name (and extension) but adds '_spinup'
    spinup_fn = fn.split('.')[0] + '_spinup.' + fn.split('.')[1]
    spinup_path = os.path.join(fp, spinup_fn)

    # check file extension of 
    with open(spinup_path, 'w') as file:
        for _, row in weather_df.iterrows():
            if file_type == 'dly':
                file.write(f'{int(row['YEAR']):6d} {int(row['MONTH']):3d} {int(row['DAY']):3d} {float(row['SRAD']):5.1f} {float(row['TMAX']):5.1f} {float(row['TMIN']):5.1f} {float(row['PRCP']):5.1f} {float(row['RH']):5.1f} {float(row['WSPD']):5.1f}\n')
            if file_type == 'hly':
                file.write(f'{int(row['YEAR']):4d} {int(row['MONTH']):4d} {int(row['DAY']):4d} {int(row['HOUR']):10d} {float(row['RFDT']):10f}\n')
                

  weather_df = pd.read_csv(file_path, header=None, sep='\s+')


### .HLY format

#### .HLY from .DLY

In [22]:
def dly2hly(dly_fp, dly_fn):

    # set path to dly file
    dly_path = os.path.join(dly_fp, dly_fn)

    # read dly file into df
    dly_df = pd.read_csv(dly_path, header=None, sep='\s+')
    dly_colnames = ['YEAR','MONTH','DAY','SRAD','TMAX','TMIN','PRCP','RH','WSPD']
    dly_df.columns = dly_colnames

    # create hly df where each row is duplicated 24 times
    hly_df = dly_df.loc[dly_df.index.repeat(24)].copy()

    # add column for hour of the day
    hly_df['HOUR'] = np.tile(np.arange(1,25), len(dly_df))

    # create new column for hourly precip
    hly_df['RFDT'] = hly_df['PRCP']/24
    hly_df['RFDT'] = hly_df.groupby(['YEAR','MONTH','DAY'])['RFDT'].cumsum() # cumulative

    # drop unncessary columns
    hly_df_filtered = hly_df.drop(columns=['SRAD','TMAX','TMIN','PRCP','RH','WSPD'])

    # build path to hly file
    hly_fn = os.path.splitext(dly_fn)[0] + '.hly'
    hly_path = os.path.join(dly_fp, hly_fn)

    # format and write into hly file
    with open(hly_path, 'w') as file:
            for _, row in hly_df_filtered.iterrows():
                file.write(f'{int(row['YEAR']):4d} {int(row['MONTH']):4d} {int(row['DAY']):4d} {int(row['HOUR']):10d} {float(row['RFDT']):10f}\n')





  dly_df = pd.read_csv(dly_path, header=None, sep='\s+')


### Write to DAT files


In [47]:
dly_fn = 'test1.dly'

def write2dat(fp, data_fn, dat_fn):
    '''
    writes the data to corresponding .DAT input file
    
    fp: path where data and .dat file are located
    data_fn: name of data file (i.e. .dly)
    dat_fn: name of corresponding .dat file to be written to

    '''
    # set path to .DAT file
    dat_path = os.path.join(dly_fp, dat_fn)

    dat_df = pd.read_csv(dat_path, header=None, sep='\s+')
    dat_df

    # check data file type
    if file_path.split('.')[1] == 'hly':
        # add a new row to dat_df, where column 0 is equal to the index +1 and column 1 is equal to the data file name
        new_row = {len(dat_df) + 1,data_fn}
        dat_df = pd.concat([dat_df, pd.DataFrame([new_row])], ignore_index=True)
        # write updated dat file
        dat_df.to_csv(dat_path, header=None, index=False, sep='\s+')
    
    if file_path.split('.')[1] == 'dly':
        

    dat_df




  dat_df = pd.read_csv(dat_path, header=None, sep='\s+')


Unnamed: 0,0,1
0,1,RG69B.HLY
1,2,RG70.HLY
2,3,RG75A.HLY
3,4,test1.dly


In [None]:
dat_fn = 'WDLSTCOM.DAT'
dat_path = os.path.join(dly_fp, dat_fn)

dly_dat_colspecs = [(1,6),(6,21)]
dat_df = pd.read_fwf(dat_path, colspecs=dly_dat_colspecs, sep='\s+')

dat_df

  dat_df = pd.read_csv(dat_path, header=None, sep='\s+')
  dat_df = pd.read_csv(dat_path, header=None, sep='\s+')


ParserError: Error tokenizing data. C error: Expected 9 fields in line 49, saw 10
