In [1]:
import os
import polars as pl
import numpy as np
import time
import datetime as dt
import aacgmv2
from spacepy import pycdf
from IPython.display import clear_output
import contextlib

In [2]:
def calculate_magnetic_coords_aacgm(lat, lon, alt, timestamp):
    dtime = dt.datetime.utcfromtimestamp(timestamp)
    with open(os.devnull, 'w') as fnull:
        with contextlib.redirect_stderr(fnull): 
            geomag_lat, geomag_lon = aacgmv2.get_aacgm_coord(lat, lon, alt, dtime)[:2]
            if geomag_lat is None or geomag_lon is None:
                return np.nan, np.nan
            try:
                ut_hours = dtime.hour + dtime.minute / 60.0
                mlt = (geomag_lon / 15.0) + ut_hours
                mlt = mlt % 24  
                return geomag_lat, mlt
            except:
                return np.nan, np.nan 

In [None]:
import os
import time
import datetime as dt
import polars as pl
from IPython.display import clear_output
from spacepy import pycdf
import numpy as np

# Timing setup
start_time = time.time()

limit = 10000  # Limit to this many CDF files for testing
champ_root_dir = "/Users/patrick/Dev/perceptive_techeval/data/champ-2002-2007/"
output_dir = "/Users/patrick/Dev/perceptive_techeval/champ_l2/"

# Ensure output directory exists
os.makedirs(output_dir, exist_ok=True)

# Count total CDF files for progress indicator
cdf_count = sum(
    len([f for f in files if f.endswith(".cdf")])
    for _, _, files in os.walk(champ_root_dir)
)
print(f"Total number of CDF files: {cdf_count}")

processed = 0

for root, dirs, files in os.walk(os.path.abspath(champ_root_dir)):
    for file in [f for f in files if f.endswith(".cdf")][0:limit]:
        file_path = os.path.join(root, file)

        # Track start time for each file
        file_start_time = time.time()

        # Load CDF data
        cdf = pycdf.CDF(file_path)
        alt = cdf["altitude"][:] / 1000.0
        lat = cdf["latitude"][:]
        lon = cdf["longitude"][:]
        lst = cdf["local_solar_time"][:]
        valid = cdf["validity_flag"][:]
        density = cdf["density"][:]
        density_orbitmean = cdf["density_orbitmean"][:]
        time_vals = cdf["time"][:]

        unix_timestamps = [dt.datetime.timestamp(t) for t in time_vals]

        # Create initial DataFrame
        df = pl.DataFrame({
            "timestamp": unix_timestamps,
            "altitude": alt,
            "latitude": lat,
            "longitude": lon,
            "local_solar_time": lst,
            "validity_flag": valid,
            "density_orbitmean": density_orbitmean,
            "density": density
        })

        # Replace high values with NaN
        columns_to_check = [col for col in df.columns if col != "timestamp"]
        df = df.with_columns([
            pl.when(pl.col(col) > 1e25).then(np.nan).otherwise(pl.col(col)).alias(col)
            for col in columns_to_check
        ])

        # Calculate geomagnetic coordinates and magnetic local time in batches
        geomagnetic_latitudes, mlts = zip(*[
            calculate_magnetic_coords_aacgm(row[0], row[1], row[2], row[3])
            for row in zip(df["latitude"], df["longitude"], df["altitude"], df["timestamp"])
        ])

        df = df.with_columns([
            pl.Series("geomagnetic_latitude", geomagnetic_latitudes),
            pl.Series("mlt", mlts)
        ])

        # Clean data
        df = df.drop_nulls()

        # Save DataFrame to individual CSV
        output_file = os.path.join(output_dir, f"{os.path.basename(file_path).replace('.cdf', '')}.csv")
        df.write_csv(output_file)

        processed += 1
        file_elapsed = time.time() - file_start_time
        percent_complete = (processed / cdf_count) * 100

        # Clear and update progress display
        clear_output(wait=True)
        print(f"Processed file {processed}/{cdf_count}: {file_path} in {file_elapsed:.2f}s ({percent_complete:.2f}%)")

        # Break the loop if the limit is reached
        if processed >= limit:
            break

# Final timing
end_time = time.time()
print(f"Total processing time: {end_time - start_time:.2f}s")

Processed file 659/2048: /Users/patrick/Dev/perceptive_techeval/data/champ-2002-2007/champ-2006/CH_OPER_DNS_ACC_2__20060109T000000_20060109T235959_0001.cdf in 3.24s (32.18%)


In [15]:
pl.read_csv("/Users/patrick/Dev/perceptive_techeval/champ_l2/CH_OPER_DNS_ACC_2__20020316T000000_20020316T235959_0001.csv")

timestamp,altitude,latitude,longitude,local_solar_time,validity_flag,density_orbitmean,density,geomagnetic_latitude,mlt,delta_density
f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
1.0163e9,392.892881,41.099531,-76.268103,18.848671,0.0,4.8323e-12,8.0952e-12,52.785473,7.030145,0.043452
1.0163e9,392.911673,40.448758,-76.255717,18.852275,0.0,4.8324e-12,8.1339e-12,52.185339,7.044432,0.045213
1.0163e9,392.934103,39.797908,-76.244367,18.85581,0.0,4.8326e-12,8.1583e-12,51.585885,7.041916,0.045141
1.0163e9,392.960245,39.146982,-76.23401,18.859279,0.0,4.8328e-12,8.1702e-12,50.987159,7.039267,0.043518
1.0163e9,392.990176,38.495982,-76.224605,18.862684,0.0,4.8329e-12,8.1715e-12,50.389205,7.036488,0.040655
…,…,…,…,…,…,…,…,…,…,…
1.0163e9,444.277574,-52.482581,102.607316,6.683964,0.0,4.8336e-12,2.3879e-12,-66.770241,17.847478,0.034003
1.0163e9,443.899562,-51.843208,102.646719,6.689369,0.0,4.8337e-12,2.3979e-12,-66.143339,17.880418,0.036874
1.0163e9,443.517267,-51.203656,102.683861,6.694624,0.0,4.8338e-12,2.4104e-12,-65.513928,17.911787,0.040586
1.0163e9,443.130811,-50.563926,102.718856,6.699735,0.0,4.8340e-12,2.4196e-12,-64.88215,17.941696,0.042702
