In [1]:
import os
import xarray as xr
import numpy as np
import pandas as pd

In [2]:
def load_netcdf_files(directory):
    files = [f for f in os.listdir(directory) if f.endswith('.nc')]
    datasets = [xr.open_dataset(os.path.join(directory, f)) for f in files]
    return datasets


In [43]:
directory = '/nobackup/sreich/swot/swot_obsfit_L3/cycle_010'
datasets10 = load_netcdf_files(directory)
#datasets

In [46]:
merged10 = None

for ds in datasets10:
    if merged10 is None:
        merged10 = ds
    elif len(ds.iOBS) > 0:
        merged10 = xr.concat([merged10, ds], dim='iOBS')

In [48]:
merged, merged10
ds_all = xr.concat([merged, merged10], dim='iOBS')

In [49]:
ds_all

In [53]:
obs = xr.Dataset(
    data_vars=dict(
        obs_date           =(["iOBS"], ds_all.obs_date.values),
        obs_YYYYMMDD       =(["iOBS"], ds_all.obs_YYYYMMDD.values),
        obs_HHMMSS         =(["iOBS"], ds_all.obs_HHMMSS.values), 
        sample_x           =(["iSAMPLE"], ds_all.sample_x.values),
        sample_y           =(["iSAMPLE"], ds_all.sample_y.values),
        sample_z           =(["iSAMPLE"], ds_all.sample_z.values),
        sample_type        =(["iSAMPLE"], ds_all.sample_type.values),
        obs_val            =(["iOBS"], ds_all.obs_val.values),
        obs_uncert         =(["iOBS"], np.ones(len(ds_all.sample_interp_i))*0.02),
        sample_interp_XC11 =(["iOBS"], ds_all.sample_interp_XC11.values ),
        sample_interp_YC11 =(["iOBS"], ds_all.sample_interp_YC11.values ),
        sample_interp_XCNINJ =(["iOBS"], ds_all.sample_interp_XCNINJ.values ),
        sample_interp_YCNINJ =(["iOBS"], ds_all.sample_interp_YCNINJ.values ),
        sample_interp_i =(["iOBS"], ds_all.sample_interp_i.values ),
        sample_interp_j =(["iOBS"], ds_all.sample_interp_j.values ),
        sample_interp_w =(["iOBS", "iINTERP"], np.ones((len(ds_all.sample_interp_i),8))/8  )
    ),
)

In [54]:
obs

In [55]:
data_dir = '/nobackup/sreich/swot/L3_aviso/'
fname =  'swot_cycles_009_010_obsfit.nc'
obs.to_netcdf(data_dir + fname)

In [None]:
def reindex_dimensions(ds):
    # Find the maximum iOBS and iSAMPLE values in the dataset
    max_iobs = ds['iOBS'].max().values
    max_isample = ds['iSAMPLE'].max().values

    # Create a new range of iOBS and iSAMPLE values from 1 to their respective maximum values
    new_iobs = np.arange(1, max_iobs + 1)
    new_isample = np.arange(1, max_isample + 1)

    # Reindex the dataset along the iOBS and iSAMPLE dimensions
    ds_reindexed = ds.reindex(iOBS=new_iobs, iSAMPLE=new_isample)
    return ds_reindexed

In [55]:
# Initialize an empty list to store concatenated datasets
merged = None

# Initialize a counter to track the current index of iOBS
current_iobs_index = 0

# Iterate over each dataset
for ds in datasets:
    # Find the size of the iOBS dimension in the current dataset
    iobs_size = ds['iOBS'].size

    # Create a new range of iOBS values starting from the current index
    new_iobs = np.arange(current_iobs_index + 1, current_iobs_index + iobs_size + 1)

    # Reindex the dataset along the iOBS dimension
    ds_reindexed = ds.reindex(iOBS=new_iobs)

    # Concatenate the reindexed dataset with the combined dataset
    if merged is None:
        merged = ds_reindexed
    else:
        merged = xr.concat([merged, ds_reindexed], dim='iOBS')

    # Update the current index of iOBS
    current_iobs_index += iobs_size

In [None]:
# Concatenate the datasets along the iOBS dimension
combined_dataset = xr.concat(combined_datasets, dim='iOBS')

In [4]:
ds1 = datasets[0]
ds2 = datasets[1]

ds1, ds2

(<xarray.Dataset>
 Dimensions:               (iOBS: 43623, iSAMPLE: 43623, iINTERP: 8)
 Coordinates:
     longitude             (iSAMPLE) float64 ...
     latitude              (iSAMPLE) float64 ...
 Dimensions without coordinates: iOBS, iSAMPLE, iINTERP
 Data variables: (12/16)
     obs_date              (iOBS) float64 ...
     obs_YYYYMMDD          (iOBS) int64 ...
     obs_HHMMSS            (iOBS) int64 ...
     sample_x              (iSAMPLE) float64 ...
     sample_y              (iSAMPLE) float64 ...
     sample_z              (iSAMPLE) float64 ...
     ...                    ...
     sample_interp_YC11    (iOBS) float64 ...
     sample_interp_XCNINJ  (iOBS) float64 ...
     sample_interp_YCNINJ  (iOBS) float64 ...
     sample_interp_i       (iOBS) float64 ...
     sample_interp_j       (iOBS) float64 ...
     sample_interp_w       (iOBS, iINTERP) float64 ...,
 <xarray.Dataset>
 Dimensions:               (iOBS: 72602, iSAMPLE: 72602, iINTERP: 8)
 Coordinates:
     longitude      

In [5]:
a = ds1.isel(iOBS=slice(0,10), iSAMPLE=slice(0,10))
b = ds2.isel(iOBS=slice(0,7), iSAMPLE=slice(0,7))
a,b

(<xarray.Dataset>
 Dimensions:               (iOBS: 10, iSAMPLE: 10, iINTERP: 8)
 Coordinates:
     longitude             (iSAMPLE) float64 ...
     latitude              (iSAMPLE) float64 ...
 Dimensions without coordinates: iOBS, iSAMPLE, iINTERP
 Data variables: (12/16)
     obs_date              (iOBS) float64 ...
     obs_YYYYMMDD          (iOBS) int64 ...
     obs_HHMMSS            (iOBS) int64 ...
     sample_x              (iSAMPLE) float64 ...
     sample_y              (iSAMPLE) float64 ...
     sample_z              (iSAMPLE) float64 ...
     ...                    ...
     sample_interp_YC11    (iOBS) float64 ...
     sample_interp_XCNINJ  (iOBS) float64 ...
     sample_interp_YCNINJ  (iOBS) float64 ...
     sample_interp_i       (iOBS) float64 ...
     sample_interp_j       (iOBS) float64 ...
     sample_interp_w       (iOBS, iINTERP) float64 ...,
 <xarray.Dataset>
 Dimensions:               (iOBS: 7, iSAMPLE: 7, iINTERP: 8)
 Coordinates:
     longitude             (iSAMPL

In [6]:
a = a.to_dataframe().reset_index().drop(columns = ['iOBS', 'iSAMPLE', 'iINTERP'])
a

Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,sample_x,sample_y,sample_z,sample_type,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j,sample_interp_w,longitude,latitude
0,739255.218539,20240104,51441,-74.828622,-146.149730,0.0,5.0,0.265860,1.0,-172.958328,-70.01696,-156.699478,-75.22966,24.0,1.0,0.125,-74.828622,-146.149730
1,739255.218539,20240104,51441,-74.828622,-146.149730,0.0,5.0,0.265860,1.0,-172.958328,-70.01696,-156.699478,-75.22966,24.0,1.0,0.125,-74.828622,-146.149730
2,739255.218539,20240104,51441,-74.828622,-146.149730,0.0,5.0,0.265860,1.0,-172.958328,-70.01696,-156.699478,-75.22966,24.0,1.0,0.125,-74.828622,-146.149730
3,739255.218539,20240104,51441,-74.828622,-146.149730,0.0,5.0,0.265860,1.0,-172.958328,-70.01696,-156.699478,-75.22966,24.0,1.0,0.125,-74.828622,-146.149730
4,739255.218539,20240104,51441,-74.828622,-146.149730,0.0,5.0,0.265860,1.0,-172.958328,-70.01696,-156.699478,-75.22966,24.0,1.0,0.125,-74.828622,-146.149730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,739255.218552,20240104,51442,-74.887935,-146.020443,0.0,5.0,0.258835,1.0,-172.958328,-70.01696,-156.699478,-75.22966,27.0,4.0,0.125,-74.887935,-146.020443
796,739255.218552,20240104,51442,-74.887935,-146.020443,0.0,5.0,0.258835,1.0,-172.958328,-70.01696,-156.699478,-75.22966,27.0,4.0,0.125,-74.887935,-146.020443
797,739255.218552,20240104,51442,-74.887935,-146.020443,0.0,5.0,0.258835,1.0,-172.958328,-70.01696,-156.699478,-75.22966,27.0,4.0,0.125,-74.887935,-146.020443
798,739255.218552,20240104,51442,-74.887935,-146.020443,0.0,5.0,0.258835,1.0,-172.958328,-70.01696,-156.699478,-75.22966,27.0,4.0,0.125,-74.887935,-146.020443


In [7]:
b = b.to_dataframe().reset_index().drop(columns = ['iOBS', 'iSAMPLE', 'iINTERP'])
b

Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,sample_x,sample_y,sample_z,sample_type,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j,sample_interp_w,longitude,latitude
0,739255.224275,20240104,52257,-66.747663,-52.955415,0.0,5.0,-0.262055,1.0,-113.053261,-74.007629,0.0,0.0,3.0,75.0,0.125,-66.747663,-52.955415
1,739255.224275,20240104,52257,-66.747663,-52.955415,0.0,5.0,-0.262055,1.0,-113.053261,-74.007629,0.0,0.0,3.0,75.0,0.125,-66.747663,-52.955415
2,739255.224275,20240104,52257,-66.747663,-52.955415,0.0,5.0,-0.262055,1.0,-113.053261,-74.007629,0.0,0.0,3.0,75.0,0.125,-66.747663,-52.955415
3,739255.224275,20240104,52257,-66.747663,-52.955415,0.0,5.0,-0.262055,1.0,-113.053261,-74.007629,0.0,0.0,3.0,75.0,0.125,-66.747663,-52.955415
4,739255.224275,20240104,52257,-66.747663,-52.955415,0.0,5.0,-0.262055,1.0,-113.053261,-74.007629,0.0,0.0,3.0,75.0,0.125,-66.747663,-52.955415
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
387,739255.224243,20240104,52254,-66.941462,-52.949173,0.0,5.0,-0.138237,1.0,-113.053261,-74.007629,0.0,0.0,3.0,81.0,0.125,-66.941462,-52.949173
388,739255.224243,20240104,52254,-66.941462,-52.949173,0.0,5.0,-0.138237,1.0,-113.053261,-74.007629,0.0,0.0,3.0,81.0,0.125,-66.941462,-52.949173
389,739255.224243,20240104,52254,-66.941462,-52.949173,0.0,5.0,-0.138237,1.0,-113.053261,-74.007629,0.0,0.0,3.0,81.0,0.125,-66.941462,-52.949173
390,739255.224243,20240104,52254,-66.941462,-52.949173,0.0,5.0,-0.138237,1.0,-113.053261,-74.007629,0.0,0.0,3.0,81.0,0.125,-66.941462,-52.949173


In [8]:
type(a), type(b)

(pandas.core.frame.DataFrame, pandas.core.frame.DataFrame)

In [20]:
ds1 = ds1.drop_dims(['iSAMPLE', 'iINTERP'])
ds1

In [21]:
#ds1 = ds1.drop_dims('iINTERP')

df1 = ds1.to_dataframe().reset_index().drop(columns = ['iOBS'])
df1

Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j
0,739255.218539,20240104,51441,0.265860,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,1.0
1,739255.218543,20240104,51442,0.266401,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,2.0
2,739255.218546,20240104,51442,0.257025,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,3.0
3,739255.218550,20240104,51442,0.267529,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,4.0
4,739255.218557,20240104,51443,0.286413,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...
43618,739255.192585,20240104,43719,0.174099,1.0,172.041672,56.978096,-173.041672,48.419586,178.0,176.0
43619,739255.192594,20240104,43720,0.160930,1.0,172.041672,56.978096,-173.041672,48.419586,178.0,177.0
43620,739255.192604,20240104,43721,0.147635,1.0,172.041672,56.978096,-173.041672,48.419586,178.0,178.0
43621,739255.192615,20240104,43721,0.137732,1.0,172.041672,56.978096,-173.041672,48.419586,178.0,179.0


In [22]:
ds2 = ds2.drop_dims(['iSAMPLE', 'iINTERP'])
df2 = ds2.to_dataframe().reset_index().drop(columns = ['iOBS'])
df2

Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j
0,739255.224275,20240104,52257,-0.262055,1.0,-113.053261,-74.007629,0.0,0.000000,3.0,75.0
1,739255.224269,20240104,52256,-0.139790,1.0,-113.053261,-74.007629,0.0,0.000000,3.0,76.0
2,739255.224265,20240104,52256,-0.114361,1.0,-113.053261,-74.007629,0.0,0.000000,3.0,77.0
3,739255.224258,20240104,52255,-0.160912,1.0,-113.053261,-74.007629,0.0,0.000000,3.0,78.0
4,739255.224254,20240104,52255,-0.192917,1.0,-113.053261,-74.007629,0.0,0.000000,3.0,79.0
...,...,...,...,...,...,...,...,...,...,...,...
72597,739255.256108,20240104,60847,0.182384,1.0,52.000000,74.055351,52.0,82.094345,122.0,68.0
72598,739255.256102,20240104,60847,0.164372,1.0,52.000000,74.055351,52.0,82.094345,122.0,69.0
72599,739255.256097,20240104,60846,0.054876,1.0,52.000000,74.055351,52.0,82.094345,122.0,70.0
72600,739255.256108,20240104,60847,0.148029,1.0,52.000000,74.055351,52.0,82.094345,123.0,69.0


In [25]:


merged = pd.concat([df1, df2])
merged



Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j
0,739255.218539,20240104,51441,0.265860,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,1.0
1,739255.218543,20240104,51442,0.266401,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,2.0
2,739255.218546,20240104,51442,0.257025,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,3.0
3,739255.218550,20240104,51442,0.267529,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,4.0
4,739255.218557,20240104,51443,0.286413,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...
72597,739255.256108,20240104,60847,0.182384,1.0,52.000000,74.055351,52.000000,82.094345,122.0,68.0
72598,739255.256102,20240104,60847,0.164372,1.0,52.000000,74.055351,52.000000,82.094345,122.0,69.0
72599,739255.256097,20240104,60846,0.054876,1.0,52.000000,74.055351,52.000000,82.094345,122.0,70.0
72600,739255.256108,20240104,60847,0.148029,1.0,52.000000,74.055351,52.000000,82.094345,123.0,69.0


Unnamed: 0,obs_date,obs_YYYYMMDD,obs_HHMMSS,obs_val,obs_uncert,sample_interp_XC11,sample_interp_YC11,sample_interp_XCNINJ,sample_interp_YCNINJ,sample_interp_i,sample_interp_j
0,739255.218539,20240104,51441,0.265860,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,1.0
1,739255.218543,20240104,51442,0.266401,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,2.0
2,739255.218546,20240104,51442,0.257025,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,3.0
3,739255.218550,20240104,51442,0.267529,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,4.0
4,739255.218557,20240104,51443,0.286413,1.0,-172.958328,-70.016960,-156.699478,-75.229660,24.0,6.0
...,...,...,...,...,...,...,...,...,...,...,...
72597,739255.256108,20240104,60847,0.182384,1.0,52.000000,74.055351,52.000000,82.094345,122.0,68.0
72598,739255.256102,20240104,60847,0.164372,1.0,52.000000,74.055351,52.000000,82.094345,122.0,69.0
72599,739255.256097,20240104,60846,0.054876,1.0,52.000000,74.055351,52.000000,82.094345,122.0,70.0
72600,739255.256108,20240104,60847,0.148029,1.0,52.000000,74.055351,52.000000,82.094345,123.0,69.0


In [16]:
obs = xr.Dataset(
    data_vars=dict(
        obs_date           =(["iOBS"], merged.obs_date.values),
        obs_YYYYMMDD       =(["iOBS"], merged.obs_YYYYMMDD.values),
        obs_HHMMSS         =(["iOBS"], merged.obs_HHMMSS.values), 
        sample_x           =(["iSAMPLE"], merged.sample_x.values),
        sample_y           =(["iSAMPLE"], merged.sample_y.values),
        sample_z           =(["iSAMPLE"], merged.sample_z.values),
        sample_type        =(["iSAMPLE"], merged.sample_type.values),
        obs_val            =(["iOBS"], merged.obs_val.values),
        obs_uncert         =(["iOBS"], merged.obs_uncert.values),
        sample_interp_XC11 =(["iOBS"], merged.sample_interp_XC11.values ),
        sample_interp_YC11 =(["iOBS"], merged.sample_interp_YC11.values ),
        sample_interp_XCNINJ =(["iOBS"], merged.sample_interp_XCNINJ.values ),
        sample_interp_YCNINJ =(["iOBS"], merged.sample_interp_YCNINJ.values ),
        sample_interp_i =(["iOBS"], merged.sample_interp_i.values ),
        sample_interp_j =(["iOBS"], merged.sample_interp_j.values ),
        sample_interp_w =(["iOBS", "iINTERP"], np.ones((len(merged.sample_interp_i),8))/8  )
    ),
)

In [17]:
obs

In [8]:
ds2

In [13]:
# Matt
obs_date_vals1 = ds1.obs_date.values
obs_date_vals2 = ds2.obs_date.values

obs_date_vals_combined = np.concatenate((obs_date_vals1, obs_date_vals2))

new_ds = xr.Dataset(
    data_vars=dict(
        obs_date           =(["iOBS"], obs_date_vals_combined),    ),
)

In [17]:
for key in list(ds1.keys()):
    print(ds1[key].dims)

('iOBS',)
('iOBS',)
('iOBS',)
('iSAMPLE',)
('iSAMPLE',)
('iSAMPLE',)
('iSAMPLE',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS',)
('iOBS', 'iINTERP')
