# Intro

The purpose of this file will be to check that the WAOM floats ran and that we can load these here.

We can base this in part off of check WAOM ran

In [1]:
# import packages and functions
import numpy as np
import matplotlib.pyplot as plt
import xarray as xr
import netCDF4 as nc
import gsw

In [2]:
path = "/scratch/mmurakami/WAOM/"

In [3]:
# first dataset (years 1-2)
filename = path + "waom_2years/ocean_flt_select.nc"
ds1 = xr.open_dataset(filename)
a = ds1.Xgrid[-1].values

In [4]:
filename = path + "waom_2years/ocean_flt_select2.nc"
ds2 = xr.open_dataset(filename)
# ds2

In [5]:
# second dataset (years 4-6)
filename = path + "waom_6years/ocean_flt.nc"
ds3 = xr.open_dataset(filename)


In [6]:
b = np.array(ds3.variables['Xgrid'][11])

In [7]:
b.shape

(3895,)

In [8]:
# Store matched indices
indices_in_a = []

# Loop over each value in `b` and find the first occurrence in `a`
for i, val in enumerate(b):
    match_idx = np.where(a == val)[0]  # Find all occurrences in `a`
    if match_idx.size > 0:  # If a match is found
        indices_in_a.append(match_idx[0])  # Store only the first match

# Convert to NumPy arrays
indices_in_a = np.array(indices_in_a)

# Print results
print(f"indices_in_a: {indices_in_a}")  # Indices where `a` matches `b` (first occurrence only)

indices_in_a: [   0    1    2 ... 3892 3893 3894]


In [9]:
a[indices_in_a]

array([453.04388, 379.27896, 439.45917, ..., 519.93756, 572.03033,
       483.87262], dtype=float32)

# great now we have the indices and the datasets, now maybe we can combine these in one big dataset

In [10]:

# Select columns from ds1 and ds2 using indices_in_a
ds1_subsel = ds1.isel(drifter=indices_in_a)  # Assuming "column" is the dimension
ds2_subsel = ds2.isel(drifter=indices_in_a)

# create a new dataset based on these ones
ds_2years = xr.Dataset(
    {
        "Xgrid": ds1_subsel["Xgrid"],
        "Ygrid": ds1_subsel["Ygrid"],
        "Zgrid": ds2_subsel["Zgrid"],
        "rho": ds2_subsel["rho"],
        "salt": ds2_subsel["salt"],
        "temp": ds2_subsel["temp"],
    },
    coords={
        "ocean_time": ds2_subsel["ocean_time"],  # Add ocean_time as a coordinate
        "drifter": ds2_subsel["drifter"],  # Ensure drifter is a coordinate
    }
)

# make a similar subselection for the ocean_flt file for years 2-6
ds_4years = xr.Dataset(
    {
        "Xgrid": ds3["Xgrid"],
        "Ygrid": ds3["Ygrid"],
        "Zgrid": ds3["Zgrid"],
        "rho": ds3["rho"],
        "salt": ds3["salt"],
        "temp": ds3["temp"],
    },
    coords={
        "ocean_time": ds3["ocean_time"],  # Add ocean_time as a coordinate
        "drifter": ds3["drifter"],  # Ensure drifter is a coordinate
    }
)


# # Append ds3 to the end of the combined dataset
# ds4 = xr.concat([ds12_combined, ds3], dim="time")  # Replace "time" with correct dim

# # Save new dataset (optional)
# ds4.to_netcdf("merged_dataset.nc")

# print(ds4)
ds_4years

In [11]:
print(ds_2years)

<xarray.Dataset>
Dimensions:     (ocean_time: 70081, drifter: 3895)
Coordinates:
  * ocean_time  (ocean_time) int64 0 1 2 3 4 5 ... 70076 70077 70078 70079 70080
  * drifter     (drifter) int64 0 1 2 3 4 5 6 ... 3889 3890 3891 3892 3893 3894
Data variables:
    Xgrid       (ocean_time, drifter) float32 ...
    Ygrid       (ocean_time, drifter) float32 ...
    Zgrid       (ocean_time, drifter) float32 ...
    rho         (ocean_time, drifter) float32 ...
    salt        (ocean_time, drifter) float32 ...
    temp        (ocean_time, drifter) float32 ...


In [12]:
# Get the number of time steps in ds_4years
num_time_steps = ds_4years.sizes["ocean_time"]

# Create a new sequential index starting from 70081
new_ocean_time = np.arange(70081, 70081 + num_time_steps)

# Assign the new index to ocean_time
ds_4years = ds_4years.assign_coords(ocean_time=new_ocean_time)

# Print to verify the new ocean_time index
print(ds_4years)

<xarray.Dataset>
Dimensions:     (ocean_time: 124283, drifter: 3895)
Coordinates:
  * ocean_time  (ocean_time) int64 70081 70082 70083 ... 194361 194362 194363
  * drifter     (drifter) int64 0 1 2 3 4 5 6 ... 3889 3890 3891 3892 3893 3894
Data variables:
    Xgrid       (ocean_time, drifter) float32 ...
    Ygrid       (ocean_time, drifter) float32 ...
    Zgrid       (ocean_time, drifter) float32 ...
    rho         (ocean_time, drifter) float32 ...
    salt        (ocean_time, drifter) float32 ...
    temp        (ocean_time, drifter) float32 ...


In [13]:
ds_2years.to_netcdf("/scratch/mmurakami/WAOM/ds_2years.nc")
ds_4years.to_netcdf("/scratch/mmurakami/WAOM/ds_4years.nc")

In [None]:
# Concatenate the datasets along ocean_time
ds_combined = xr.concat([ds_2years, ds_4years], dim="ocean_time")

# Print to verify
print(ds_combined)

In [None]:
output_file = "/scratch/mmurakami/WAOM/ocean_flt_combined.nc"

# Save the stacked dataset to NetCDF
ds_combined.to_netcdf(output_file)

# we need to remake these based on the time of release, may as well do that here

In [19]:
# import numpy as np
new_locs = np.loadtxt('/scratch/mmurakami/WAOM/new_locs.txt', dtype=int)

# Define parameters
size = 40664  # Total number of elements
reset_interval = 104  # After this many indices, reset
start = 11  # Start value
increment = 672  # Step size

# Create array
arr = np.zeros(size, dtype=int)

for i in range(size):
    arr[i] = start + (i % reset_interval) * increment  # Reset every 104 elements

# Print first few sequences to verify
#print(arr[:200])  # Print more to verify the pattern
print(f"Array shape: {arr.shape}")


# now select from new_locs and then from indices_in_a
times = arr[new_locs][indices_in_a]

Array shape: (40664,)
