In [83]:
import pandas as pd
import numpy as np
import xarray as xr
from datetime import datetime as dt
from datetime import timedelta

ham = pd.read_csv('C:/Users/michaelself10/Desktop/projects/radiosonde_tracking/20250725-183940_W2245460_RS41_401401_sonde_HAM.log', parse_dates=['timestamp'])

ham.timestamp = ham.timestamp.dt.round('1s')
# drop the rows before 10, 25, 22:52:44 and after 10, 26, 00:19:11, when the balloon burst
ham = ham[(ham.timestamp.dt.tz_localize(None) >= dt(2025, 10, 25, 22, 52, 44)) & (ham.timestamp.dt.tz_localize(None) <= dt(2025, 10, 26, 0, 19, 11))]
# drop unused columns
# ham.columns


base = pd.read_csv('C:/Users/michaelself10/Desktop/projects/radiosonde_tracking/20251022-233732_W2245460_RS41-SGP_401401_sonde_BASE.log', parse_dates=['timestamp'])

base.timestamp = base.timestamp.dt.round('1s')
# drop the rows before 10, 25, 22:52:44
base = base[(base.timestamp.dt.tz_localize(None) >= dt(2025, 10, 25, 22, 52, 44)) & (base.timestamp.dt.tz_localize(None) <= dt(2025, 10, 26, 0, 19, 11))]
# drop unused columns
# base.columns

# combine the two dataframes, so that there are as many frames as possible
combined = pd.concat([ham, base]).sort_values(by='timestamp').reset_index(drop=True)

# drop rows with missing data from critical columns (temp = -273, pressure, humidity, alt, heading, vel_h = -1)
combined = combined[(combined['temp'] != -273) & (combined['pressure'] != -1) & (combined['humidity'] != -1) & (combined['alt'] != -1) & (combined['heading'] != -1) & (combined['vel_h'] != -1)]

# drop duplicate frames, keeping the first occurrence (ham preferred over base)
combined = combined.drop_duplicates(subset=['frame'], keep='first')

### PREPARE DATA FOR RAOB FORMAT

# calculate dewpoint (in celsius) from temperature and humidity
#  TD: =243.04*(LN(RH/100)+((17.625*T)/(243.04+T)))/(17.625-LN(RH/100)-((17.625*T)/(243.04+T)))
combined['DWPT'] = 243.04 * (np.log(combined['humidity']/100) + ((17.625 * combined['temp']) / (243.04 + combined['temp']))) / (17.625 - np.log(combined['humidity']/100) - ((17.625 * combined['temp']) / (243.04 + combined['temp'])))
combined['DWPT'] = combined['DWPT'].round(2)

# calculate wind direction from heading
combined['WDIR'] = (combined['heading'] + 180) % 360
combined['WDIR'] = combined['WDIR'].round(2)

# convert wind speed from m/s to knots
combined['WSPD'] = combined['vel_h'] * 1.94384
combined['WSPD'] = combined['WSPD'].round(2)
# change pressure to LEVEL
combined['LEVEL'] = combined['pressure']
combined['LEVEL'] = combined['LEVEL'].round(2)

# change alt to HGHT
combined['HGHT'] = combined['alt']
combined['HGHT'] = combined['HGHT'].round(2)
# change temp to TEMP
combined['TEMP'] = combined['temp']
combined['TEMP'] = combined['TEMP'].round(2)

combined = combined[['LEVEL','HGHT','TEMP','DWPT','WDIR','WSPD']]
combined

Unnamed: 0,LEVEL,HGHT,TEMP,DWPT,WDIR,WSPD
0,1014.5,10.4,24.1,15.24,195.9,0.39
2,1014.6,10.4,24.1,15.24,163.9,0.19
4,1014.6,10.4,24.1,15.24,148.5,0.39
6,1014.6,10.1,23.9,14.89,135.1,2.14
8,1014.6,9.4,23.7,14.76,102.8,10.30
...,...,...,...,...,...,...
5053,11.9,29930.1,-48.9,-78.46,281.5,33.05
5054,11.9,29937.6,-48.9,-78.46,268.4,28.77
5055,11.9,29967.5,-49.0,-78.53,232.9,29.94
5056,11.8,29995.6,-49.1,-78.61,299.6,17.69


In [85]:
# save to csv
combined.to_csv('20251025_RS41_WBL_filtered_raob.txt', index=False)