This notebook preprocessed raw trisonica data to adjust the reference system to the EP one 

In [None]:
from pathlib import Path
import numpy as np
from multiprocessing import Pool
import logging as log
from functools import partial
from wind_tools import get_wind_dir, filter_by_wind_dir_single, mod
from scipy.spatial.transform import Rotation as R
log.basicConfig(level=log.INFO) #uncomment to see messages from extract
# name_re = r"*_TRS_M00506_com3.raw"
in_dir = Path("2020_data/data_20200724_final_field_install/raw")
out_dir = Path("2020_data/data_20200724_final_field_install/preprocessed")
u, v, w = 0, 1, 2 #defing costants for accessing data

Utility function to rotate wind compoments, naive and slow approach but it works

In TRS deafult referecne system, the u xis is inverted compared to the EP one, fix this

In [None]:
def process_m506(data):
    # fix TRS1 u and v ax by inverting it, due to different coordinate system between trisonica and EP
    data[:, u] = -1 * data[:, u]
    data[:, v] = -1 * data[:, v]
    return data

In [None]:
rot_m507 = R.from_euler('XYZ', [-100.,    0.,   32.], degrees=True)

def process_m507(data):  
    data[:, u] = -1 * data[:, u]
    rot_data = data.copy() #to preserve temperature
    rot_data[:, [u,v,w]] = rot_m507.apply(data[:, [u,v,w]])
    
    return rot_data


In [None]:
start_angle = 230
range_angle = 30 # to allow from errors in field installation
wm_basename = '_WM_174605_com1.raw'
wm_offset = 310

def wm_dir_filter(f):
    """f is the filename of another anemometer at the same time"""
    wm_path = f.parent / (f.name[:13] + wm_basename) # the first 13 is the date info
    wm = np.genfromtxt(wm_path, usecols=(2,3), invalid_raise=False, delimiter=',')
    wd = mod(get_wind_dir(wm[:, 0], wm[:, 1]) + wm_offset)
    return filter_by_wind_dir_single(wd, start_angle, range_angle) 

def get_m507_filtered(filt):
    def filtered_m507(data):
        new_data = process_m507(data)
        length = min(len(data), len(filt))
        new_data[:length][np.logical_not(filt[:length ])] = -9999. # set to no value where the filter is not true
        
        return new_data
    return filtered_m507
    
def extract_m507_filtered(usecols, processor, f, name_suffix=""):
    
    filt = wm_dir_filter(f)
    processor = get_m507_filtered(filt)
    extract(usecols, processor,f, name_suffix )
    


## Testing M507 wind filter

In [None]:
#interesting period with EP data is
testf = in_dir / "20200724-2200_WM_174605_com1.raw"
tfilt = wm_dir_filter(testf)

In [None]:
tfilt[:1210]

array([False, False, False, ..., False, False, False])

In [None]:
ep_outf = in_dir / "../processed/eddypro_raw_datasets/level_1/20200724-2202_raw_dataset_2020-08-07T163138_adv.txt"

In [None]:
ep_out = np.genfromtxt(ep_outf, skip_header=10)

In [None]:
ep_out[ep_out==-9999.] = np.nan

In [None]:
ep_out[181]

array([nan, nan, nan, nan])

In [None]:
wd = mod(get_wind_dir(ep_out[:, 0], ep_out[:, 1]) + wm_offset)

In [None]:
wdr = get_wind_dir(ep_out[:, 0], ep_out[:, 1])

In [None]:
wdr[~np.isnan(wdr)].mean()

19.65297272650223

In [None]:
wd[~np.isnan(wd)].mean()

329.65297272650224

In [None]:
def extract(usecols, processor, f, name_suffix=""):
    # log.info(f"opening {f}")
    out_name = out_dir / f"{f.name[:-4]}{name_suffix}.csv"
    if out_name.exists():
        log.debug(f" exists, skipping {out_name}")
    else:
        data = np.genfromtxt(f, usecols=usecols, invalid_raise=False)
        data = processor(data)
        np.savetxt(out_name, data, header="u,v,w,t", delimiter=',', fmt='%2.2f')
        log.info(f"saved file {out_name}")

settings for each anemometer, the processor take in input an array where columns are u,v,w,t and returns a transformed array

In [None]:
def nothing(x): return x
settings = {
    'm506': {
        'usecols': (10, 12, 14, 16),
        'name_re': r"*_TRS_M00506_com3.raw",
        'processor': process_m506
        },
    'm507': {
        'usecols': (10, 12, 14, 16),
        'name_re': r"*_TRS_M00507_com2.raw",
        'processor': process_m507  
    },
    'm507_raw': { #Does nothing so can be used EP to aggregate data
        'usecols': (10, 12, 14, 16),
        'name_re': r"*_TRS_M00507_com2.raw",
        'processor': nothing,
        'name_suffix': '_raw'
    },
    'm507_filtered':{
        'usecols': (10, 12, 14, 16),
        'name_re': r"*_TRS_M00507_com2.raw",
        'extractor': extract_m507_filtered,
        'name_suffix': '_filtered_230_30'
    }
}

settings_default = {
        'usecols': (10, 12, 14, 16),
        'processor': nothing,
        'extractor': extract,
        'name_suffix': ''
    }
#     'm506_v_inv': {
#         'usecols': (10, 12, 14, 16),
#         'name_re': r"*_TRS_M00506_com3.raw",
#         'processor': process_trs1_inv_v,
#         'name_suffix': '_inv_v'
#     }


# wm = {'usecols': ()}

In [None]:
def getordef(setg, item):
    return setg.get(item, settings_default.get(item, ""))

In [None]:
def main():
    print("starting processing...")
    if not out_dir.is_dir(): out_dir.mkdir(parents=True, exist_ok=True)
    with Pool() as p:
        for setg in settings.values():
            proc = partial(getordef(setg, 'extractor'), getordef(setg, "usecols"), getordef(setg, "processor"), name_suffix=getordef(setg, 'name_suffix'))
            p.map(proc, in_dir.glob(getordef(setg, "name_re")))
    print("done")
    # for f in in_dir.glob(name_re):
    #     extract(f)

In [None]:
main()

starting processing...


  filt = (wind_dir > mod((start_ang - range_ang))) & (wind_dir < mod(start_ang + range_ang))
  filt = (wind_dir > mod((start_ang - range_ang))) & (wind_dir < mod(start_ang + range_ang))
  if both_dirs: filt = filt | ((wind_dir > mod(((start_ang-180) - range_ang))) & (wind_dir< mod((start_ang-180) + range_ang)))
  if both_dirs: filt = filt | ((wind_dir > mod(((start_ang-180) - range_ang))) & (wind_dir< mod((start_ang-180) + range_ang)))


done
