In [43]:
import pandas as pd
import xarray as xr
import matplotlib.pyplot as plt 
from glob import glob
from pathlib import Path

In [None]:
data_dir='./data/Alg307-CTD-Data-Processed/down_casts/'

files = sorted(glob(data_dir+"*.asc"))

dfs = []
for f in files:
    d = pd.read_csv(f,delim_whitespace=True)

    # optional: parse time if you have a time column
    if "time" in d.columns:
        d["time"] = pd.to_datetime(d["time"], errors="coerce", utc=True)

    d["cast_id"] = Path(f).stem   
    dfs.append(d)

df = pd.concat(dfs, ignore_index=True, sort=False)

In [45]:
# sort first columns to cast_id, timeS, Latitude, Longitude, Depth, then the rest
cols = df.columns.tolist()
sorted_cols = ['cast_id', 'TimeS', 'Latitude', 'Longitude', 'DepSM','PrDM'] + [c for c in cols if c not in ['cast_id', 'TimeS', 'Latitude', 'Longitude', 'DepSM','PrDM']]
df = df[sorted_cols]


In [46]:
df

Unnamed: 0,cast_id,TimeS,Latitude,Longitude,DepSM,PrDM,C0S/m,AvgsvWM,FlECO-AFL,Sbox0Mm/Kg,OxsatMm/Kg,Par,T090C,Density00,Sal00,Gsw_saA0,Gsw_ctA0,Flag
0,dctd001,180.540,-32.28501,18.22672,0.988,1.0,2.716884,0.00,13.0004,264.552,266.16084,9999.00,12.5449,1018.2042,24.2780,24.3927,12.7526,0.0
1,dctd001,188.097,-32.28501,18.22674,2.006,2.0,1.694843,0.00,19.2748,285.993,299.53320,9999.00,12.5376,1010.3472,14.0890,14.1555,12.9461,0.0
2,dctd001,197.809,-32.28503,18.22678,2.979,3.0,3.746839,0.00,21.5750,237.973,268.95725,9999.00,11.7996,1025.0076,32.8885,33.0439,11.8316,0.0
3,dctd001,334.182,-32.28529,18.22744,3.975,4.0,3.816438,0.00,21.6117,192.400,269.37626,9999.00,11.4366,1025.7345,33.7405,33.8999,11.4466,0.0
4,dctd001,396.030,-32.28540,18.22778,4.975,5.0,3.876351,0.00,16.7919,206.190,269.88828,7299.30,10.8969,1026.6732,34.8158,34.9802,10.8897,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4054,dctd024,870.144,-32.61487,16.99374,306.620,309.0,3.643946,1501.86,0.2451,153.380,285.00201,444.23,8.4750,1028.3102,34.6240,34.7879,8.4394,0.0
4055,dctd024,874.547,-32.61484,16.99374,307.589,310.0,3.643317,1500.18,0.2508,150.909,285.07029,445.36,8.4688,1028.3148,34.6229,34.7869,8.4330,0.0
4056,dctd024,875.621,-32.61484,16.99374,308.596,311.0,3.642665,1500.11,0.2530,151.060,285.07952,445.96,8.4609,1028.3209,34.6232,34.7872,8.4251,0.0
4057,dctd024,876.833,-32.61485,16.99374,309.584,312.0,3.642395,1500.11,0.2524,151.720,285.14056,446.31,8.4571,1028.3264,34.6236,34.7876,8.4212,0.0


In [None]:
# Rename columns for easier access
df = df.rename(columns={'TimeS': 'time', 'Latitude': 'lat', 'Longitude': 'lon', 'DepSM': 'depth_m', 'PrDM': 'pressure_dbar','COS/m':'conductivity_s/m',
                         'T090C': 'temperature_C', 'S_A1': 'salinity_psu',
                        })

Unnamed: 0,cast_id,TimeS,TimeS.1,Latitude,Longitude,DepSM,PrDM,C0S/m,AvgsvWM,FlECO-AFL,Sbox0Mm/Kg,OxsatMm/Kg,Par,T090C,Density00,Sal00,Gsw_saA0,Gsw_ctA0,Flag
0,dctd001,180.540,180.540,-32.28501,18.22672,0.988,1.0,2.716884,0.00,13.0004,264.552,266.16084,9999.00,12.5449,1018.2042,24.2780,24.3927,12.7526,0.0
1,dctd001,188.097,188.097,-32.28501,18.22674,2.006,2.0,1.694843,0.00,19.2748,285.993,299.53320,9999.00,12.5376,1010.3472,14.0890,14.1555,12.9461,0.0
2,dctd001,197.809,197.809,-32.28503,18.22678,2.979,3.0,3.746839,0.00,21.5750,237.973,268.95725,9999.00,11.7996,1025.0076,32.8885,33.0439,11.8316,0.0
3,dctd001,334.182,334.182,-32.28529,18.22744,3.975,4.0,3.816438,0.00,21.6117,192.400,269.37626,9999.00,11.4366,1025.7345,33.7405,33.8999,11.4466,0.0
4,dctd001,396.030,396.030,-32.28540,18.22778,4.975,5.0,3.876351,0.00,16.7919,206.190,269.88828,7299.30,10.8969,1026.6732,34.8158,34.9802,10.8897,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4054,dctd024,870.144,870.144,-32.61487,16.99374,306.620,309.0,3.643946,1501.86,0.2451,153.380,285.00201,444.23,8.4750,1028.3102,34.6240,34.7879,8.4394,0.0
4055,dctd024,874.547,874.547,-32.61484,16.99374,307.589,310.0,3.643317,1500.18,0.2508,150.909,285.07029,445.36,8.4688,1028.3148,34.6229,34.7869,8.4330,0.0
4056,dctd024,875.621,875.621,-32.61484,16.99374,308.596,311.0,3.642665,1500.11,0.2530,151.060,285.07952,445.96,8.4609,1028.3209,34.6232,34.7872,8.4251,0.0
4057,dctd024,876.833,876.833,-32.61485,16.99374,309.584,312.0,3.642395,1500.11,0.2524,151.720,285.14056,446.31,8.4571,1028.3264,34.6236,34.7876,8.4212,0.0


In [None]:
out_dir = './data/Alg307-CTD-Data-Processed/'
df.to_csv(out_dir+'Alg_dcasts.csv')