In [5]:
# choose the columns to be read from the input file
columns_mapping = {
    'TIMESTAMP_1': 'TIMESTAMP_1',
    'TIMESTAMP_2': 'TIMESTAMP_2',
    'Temp_K_Avg': 'Ta_0_0_1',
    'press_Avg': 'Pa_0_0_1',
    'rh_hmp1_Avg': 'RH_0_0_1',
    'Rn_total_Avg': 'Rn_0_0_1',
    'Rad_long_Dn_Avg': 'LWin_0_0_1',
    'Rad_long_Up_Avg': 'LWout_0_0_1',
    'Rad_short_Dn_Avg': 'SWin_0_0_1',
    'Rad_short_Up_Avg': 'SWout_0_0_1',
    'par_flxdens_Avg': 'PPFD_0_0_1',
    'wnd_spd': 'MWS_0_0_1',
    'wnd_dir_compass': 'WD_0_0_1',
    'SoilT_Avg(1)': 'Ts_0_0_1',
    'SoilT_Avg(2)': 'Ts_0_0_2',
    'SoilT_Avg(3)': 'Ts_0_0_3',
    'HFT_Avg(1)': 'SHF_0_0_1',
    'HFT_Avg(2)': 'SHF_0_0_2',
    'HFT_Avg(3)': 'SHF_0_0_3',
    'vwc_Avg': 'SWC_0_0_1',
}
# define the units of the columns
units = {
    'TIMESTAMP_1': 'yyyy-mm-dd',
    'TIMESTAMP_2': 'HHMM',
    'Ta_0_0_1': 'K',
    'Pa_0_0_1': 'kPa',
    'RH_0_0_1': '%',
    'Rn_0_0_1': 'W+1m-2',
    'LWin_0_0_1': 'W+1m-2',
    'LWout_0_0_1': 'W+1m-2',
    'SWin_0_0_1': 'W+1m-2',
    'SWout_0_0_1': 'W+1m-2',
    'PPFD_0_0_1': 'umol+1m-2s-1',
    'MWS_0_0_1': 'm+1s-1',
    'WD_0_0_1': 'degrees',
    'Ts_0_0_1': 'C',
    'Ts_0_0_2': 'C',
    'Ts_0_0_3': 'C',
    'SHF_0_0_1': 'W+1m-2',
    'SHF_0_0_2': 'W+1m-2',
    'SHF_0_0_3': 'W+1m-2',
    'SWC_0_0_1': 'm+3m-3',
}

In [None]:
import pandas as pd

#read the data from the input file
file_path = "H:\\Flux\\KBS\\2023T7\\raw\\TOA5_flux7_230808.dat"  
df = pd.read_csv(file_path, skiprows=[0, 2, 3], low_memory=False) # skip 3 rows

# parse the timestamp column
try:
    df['TIMESTAMP'] = pd.to_datetime(df['TIMESTAMP'])
    df['TIMESTAMP_1'] = df['TIMESTAMP'].dt.strftime('%Y-%m-%d')
    df['TIMESTAMP_2'] = df['TIMESTAMP'].dt.strftime('%H%M')
except Exception as e:
    print("Error parsing timestamp:", e)

# rename columns
df_selected = df[['TIMESTAMP_1', 'TIMESTAMP_2', 
                  'Temp_K_Avg', 'press_Avg', 'rh_hmp1_Avg', 
                  'Rn_total_Avg', 'Rad_long_Dn_Avg', 'Rad_long_Up_Avg', 'Rad_short_Dn_Avg', 'Rad_short_Up_Avg',
                  'par_flxdens_Avg', 'wnd_spd', 'wnd_dir_compass',
                  'SoilT_Avg(1)', 'SoilT_Avg(2)', 'SoilT_Avg(3)',
                  'HFT_Avg(1)', 'HFT_Avg(2)', 'HFT_Avg(3)',
                  'vwc_Avg']]
df_selected = df_selected.rename(columns=columns_mapping)

# convert RH to percentage
df_selected['RH_0_0_1'] = df_selected['RH_0_0_1'] * 100

# add units
df_selected.loc[-1] = units 

# sort columns
df_selected = df_selected.sort_index().reset_index(drop=True)

# fill NaN values with -999
df_selected = df_selected.fillna(-999)

# save the data to a new file
output_file = "H:\\Flux\\KBS\\2023T7\\raw\\TOA5_flux7_230808.csv"
df_selected.to_csv(output_file, index=False)

print(f"Data successfully converted and saved to {output_file}")

Data successfully converted and saved to H:\Flux\KBS\2023T7\raw\TOA5_flux7_230808.csv
