In [7]:
%matplotlib inline
#%reset
import numpy as np
import pandas as pd
import xarray as xr
import matplotlib
from matplotlib import pyplot as plt, animation
import cartopy
import datetime as dati
import netCDF4 as ntc
from operator import attrgetter
import time as tm
import os
from os import chdir, getcwd
import math
from math import sqrt, pi, cos
import warnings as wr

scen = np.array(["S0", "S1", "S2", "S3", "S4", "S5", "S6"])
mon = np.array(["may", "jun", "jul", "aug", "sep", "oct"])

# Rechunk and save data

May, July, August, and October (31 days) should have 6912 trajectories, or 54 psets. June and Septermber should have 6656 trajectories, or 52 psets. The number of observations will depend on particle residence time.

In the data, 'trajectory' is particles, and 'obs' (observations) is times relative to the particle - so every particle will have an obs=0 but only a few will have the maximum number of observations. On creation, each chunk in the data is all particles at one observation. The rechunking reorganises into chunks of 24 time records (24 hours) and 128 particles (1 pset).

## All in one go

In [8]:
#%%script echo Skipped!

#traj_n = np.array([6912, 6656, 6912, 6912, 6656, 6912])

for mo in range(6):
    for sc in [0,1,2]:
        path = "D:\\OceanParcels\\"
        zarr2 =  path + "outputs2\\" + scen[sc] + "_" + mon[mo] + ".zarr"
        nc2 = path + "outputs2\\" + scen[sc] + "_" + mon[mo] + "_set2.nc"
        print("Starting " + zarr2)
        
        # Open new runfile
        ds = xr.open_zarr(zarr2, decode_times = False).load()
        ds.close()      
        ds = ds.drop_vars('trajectory', errors = 'ignore')
        
        # Check there are enough psets
        traj1 = sum(np.shape(ds.trajectory.values))
        
        #if traj1 != traj_n[mo]:
        #    print("Not enough psets!")
        #    break
        
        lats = ds.lat.values
        lons = ds.lon.values
        deps = ds.z.values

        nparts = np.shape(lats)[0]

        lat_dist = np.absolute(np.diff(lats, axis=1))
        lat_dist = np.concatenate([np.zeros(shape = (nparts, 1)), lat_dist], axis = 1) * 1110 # in m
        lon_dist = np.absolute(np.diff(lons, axis=1))
        lon_dist = np.concatenate([np.zeros(shape = (nparts, 1)), lon_dist], axis = 1) * 1110 * np.cos(lats * pi/180)
        dep_dist = np.absolute(np.diff(deps, axis=1))
        dep_dist = np.concatenate([np.zeros(shape = (nparts, 1)), dep_dist], axis = 1)
        dist_2D = np.sqrt(np.power(lat_dist, 2) + np.power(lon_dist, 2))
        dist_3D = np.sqrt(np.power(lat_dist, 2) + np.power(lon_dist, 2) + np.power(dep_dist, 2))

        ds_dist = xr.Dataset(data_vars = {'lat_dist': (['trajectory','obs'], lat_dist),
                                          'lon_dist': (['trajectory','obs'], lon_dist),
                                          'dep_dist': (['trajectory','obs'], dep_dist), 
                                          'dist_2D': (['trajectory','obs'], dist_2D),
                                          'dist_3D': (['trajectory','obs'], dist_3D)})

        ds = ds.merge(ds_dist)
        
        ds.obs.attrs = {'long_name': 'Nth observation of particle', 'standard_name': 'observation'}
        ds.z.attrs = {'long_name': 'Position in water column', 'units': 'm', 'positive': 'down', 'standard_name': 'depth'}
        ds.lat.attrs = {'long_name': 'Degrees latitude', 'units': 'degrees_north', 'standard_name': 'latitude'}
        ds.lon.attrs = {'long_name': 'Degrees longitude', 'units': 'degrees_east', 'standard_name': 'longitude'}
        ds.time.attrs = {'long_name': 'Fieldset time', 'standard_name': 'time'}
        ds_dist.lat_dist.attrs = {'long_name': 'Distance travelled in latitudinal direction', 'unit': 'm', 'standard_name': 'lat_dist'}
        ds_dist.lon_dist.attrs = {'long_name': 'Distance travelled in longitudinal direction', 'unit': 'm', 'standard_name': 'lon_dist'}
        ds_dist.dep_dist.attrs = {'long_name': 'Distance travelled in vertical direction', 'unit': 'm', 'standard_name': 'dep_dist'}
        ds_dist.dist_2D.attrs = {'long_name': 'Horizontal distance travelled', 'unit': 'm', 'standard_name': 'dist_2D'}
        ds_dist.dist_3D.attrs = {'long_name': 'Horizontal and vertical distance travelled', 'unit': 'm', 'standard_name': 'dist_3D'}
        xr.set_options(keep_attrs = True)
        
        ds_rechunked = ds.chunk({"trajectory": 64, "obs": 24})       
        ds_rechunked.to_netcdf(nc2)
        
        del ds, ds_rechunked

# ds_rechunked

Starting D:\OceanParcels\outputs2\S0_may.zarr
Starting D:\OceanParcels\outputs2\S1_may.zarr
Starting D:\OceanParcels\outputs2\S2_may.zarr
Starting D:\OceanParcels\outputs2\S0_jun.zarr
Starting D:\OceanParcels\outputs2\S1_jun.zarr
Starting D:\OceanParcels\outputs2\S2_jun.zarr
Starting D:\OceanParcels\outputs2\S0_jul.zarr
Starting D:\OceanParcels\outputs2\S1_jul.zarr
Starting D:\OceanParcels\outputs2\S2_jul.zarr
Starting D:\OceanParcels\outputs2\S0_aug.zarr
Starting D:\OceanParcels\outputs2\S1_aug.zarr
Starting D:\OceanParcels\outputs2\S2_aug.zarr
Starting D:\OceanParcels\outputs2\S0_sep.zarr
Starting D:\OceanParcels\outputs2\S1_sep.zarr
Starting D:\OceanParcels\outputs2\S2_sep.zarr
Starting D:\OceanParcels\outputs2\S0_oct.zarr
Starting D:\OceanParcels\outputs2\S1_oct.zarr
Starting D:\OceanParcels\outputs2\S2_oct.zarr


## Remove duplicates from first set

In [5]:
for mo in range(6):
    for sc in range(7):
        path = "D:\\OceanParcels\\"
        nc1 = path + "outputs\\" + scen[sc] + "_" + mon[mo] + ".nc"
        nc2 = path + "outputs2\\" + scen[sc] + "_" + mon[mo] + "_set1.nc"
        
        # Open original runfile
        ds1 = xr.open_dataset(nc1, decode_times = False).load()
        ds1 = ds1.drop_vars('trajectory', errors = 'ignore')
        ds1.close()
        
        # Remove duplicates
        tot = sum(np.shape(ds1.trajectory.values))
        done = np.arange(0, tot, step = 2)
        ds1 = ds1.sel(trajectory = done)
        
        ds_rechunked = ds1.chunk({"trajectory": 64, "obs": 24}) 
        ds_rechunked.to_netcdf(nc2)
        
        del ds1, ds_rechunked
        # Merge the two together? (does this work??)
        #ds_rechunked = ds_rechunked.merge(ds1) 

## Seperated

In [143]:
%%script echo Skipped!

mo = 0
sc = 3

# For working on home computer
path = "D:\\OceanParcels\\outputs\\"

scen = np.array(["S0", "S1", "S2", "S3", "S4", "S5", "S6"])
mon = np.array(["may", "jun", "jul", "aug", "sep", "oct"])

zarrname = path + "backup zarr files (spujb only)\\" + scen[sc] + " zarrs\\atten_" + mon[mo] + "_" + scen[sc] + ".zarr"
ncname = path + scen[sc] + "_" + mon[mo] + ".nc"
print(zarrname)

Skipped!


In [144]:
%%script echo Skipped!

ds = xr.open_zarr(zarrname).load()
ds.close()
ds # just checking it looks ok

Skipped!


In [145]:
%%script echo Skipped!

lats = ds.lat.values
lons = ds.lon.values
deps = ds.z.values

nparts = np.shape(lats)[0]

lat_dist = np.absolute(np.diff(lats, axis=1))
lat_dist = np.concatenate([np.zeros(shape = (nparts, 1)), lat_dist], axis = 1) * 1110 # in m
lon_dist = np.absolute(np.diff(lons, axis=1))
lon_dist = np.concatenate([np.zeros(shape = (nparts, 1)), lon_dist], axis = 1) * 1110 * np.cos(lats * pi/180)
dep_dist = np.absolute(np.diff(deps, axis=1))
dep_dist = np.concatenate([np.zeros(shape = (nparts, 1)), dep_dist], axis = 1)
dist_2D = np.sqrt(np.power(lat_dist, 2) + np.power(lon_dist, 2))
dist_3D = np.sqrt(np.power(lat_dist, 2) + np.power(lon_dist, 2) + np.power(dep_dist, 2))

ds_dist = xr.Dataset(data_vars = {'lat_dist': (['trajectory','obs'], lat_dist),
                                  'lon_dist': (['trajectory','obs'], lon_dist),
                                  'dep_dist': (['trajectory','obs'], dep_dist), 
                                  'dist_2D': (['trajectory','obs'], dist_2D),
                                  'dist_3D': (['trajectory','obs'], dist_3D)})

ds = ds.merge(ds_dist).drop_vars('trajectory')
ds

Skipped!


In [146]:
%%script echo Skipped!

ds.obs.attrs = {'long_name': 'Nth observation of particle', 'standard_name': 'observation'}
# ds.trajectory.attrs = {'long_name': 'Particle ID', 'standard_name': 'particle'}
ds.z.attrs = {'long_name': 'Position in water column', 'units': 'm', 'positive': 'down', 'standard_name': 'depth'}
ds.lat.attrs = {'long_name': 'Degrees latitude', 'units': 'degrees_north', 'standard_name': 'latitude'}
ds.lon.attrs = {'long_name': 'Degrees longitude', 'units': 'degrees_east', 'standard_name': 'longitude'}
ds.time.attrs = {'long_name': 'Fieldset time', 'standard_name': 'time'}
ds_dist.lat_dist.attrs = {'long_name': 'Distance travelled in latitudinal direction', 'unit': 'm', 'standard_name': 'lat_dist'}
ds_dist.lon_dist.attrs = {'long_name': 'Distance travelled in longitudinal direction', 'unit': 'm', 'standard_name': 'lon_dist'}
ds_dist.dep_dist.attrs = {'long_name': 'Distance travelled in vertical direction', 'unit': 'm', 'standard_name': 'dep_dist'}
ds_dist.dist_2D.attrs = {'long_name': 'Horizontal distance travelled', 'unit': 'm', 'standard_name': 'dist_2D'}
ds_dist.dist_3D.attrs = {'long_name': 'Horizontal and vertical distance travelled', 'unit': 'm', 'standard_name': 'dist_3D'}
xr.set_options(keep_attrs = True)

Skipped!


In [147]:
%%script echo Skipped!

ds_rechunked = ds.chunk({"trajectory": 128, "obs": 24})
display(ds_rechunked.chunks)
ds_rechunked

Skipped!


In [148]:
%%script echo Skipped!

ds_rechunked.to_netcdf(ncname)
del ds, ds_rechunked

Skipped!
