### 1. Preliminaries

#### 1.1 Packages

In [1]:
import os
import numpy as np
import xarray as xr
import geopandas as gp
import pandas as pd

import cmocean

import cartopy
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
cartopy.config['data_dir'] = os.getenv('CARTOPY_DIR', cartopy.config.get('data_dir'))

from parcels import FieldSet, Field, ParticleSet, Variable, JITParticle
from parcels import AdvectionRK4, plotTrajectoriesFile, ErrorCode

import math
from datetime import timedelta as delta
from operator import attrgetter

from matplotlib import pyplot as plt
#%config InlineBackend.figure_format = 'retina'
plt.ion()  # To trigger the interactive inline mode

<matplotlib.pyplot._IonContext at 0x193d1cb3f10>

#### 1.2 Functions

### 2. Data

#### 2.1 Load Simulation Output

In [2]:
output_nc_dist = 'CurrentParticlesDistMultiple.zarr'
parcels_dist = xr.open_dataset(output_nc_dist)
parcels_dist



#### 2.2 Load Centroids of BOATS Grid

In [3]:
df = pd.read_csv('C:\\Users\\sandr\\Documents\\Github\\ThesisSandra\\Analysis\\Movement\\Data\\oceanCellsBOATS.csv', header=None)
df = df.rename(columns={df.columns[0]: 'lon', df.columns[1]: 'lat'}).drop(df.columns[2], axis=1)

In [4]:
gdfBOATS = gp.GeoDataFrame(
    df, geometry=gp.points_from_xy(df.lon,df.lat))
gdfBOATS = gdfBOATS.set_crs('epsg:4326')
gdfBOATS

Unnamed: 0,lon,lat,geometry
0,18.5,-84.5,POINT (18.50000 -84.50000)
1,19.5,-84.5,POINT (19.50000 -84.50000)
2,20.5,-84.5,POINT (20.50000 -84.50000)
3,21.5,-84.5,POINT (21.50000 -84.50000)
4,22.5,-84.5,POINT (22.50000 -84.50000)
...,...,...,...
41023,175.5,88.5,POINT (175.50000 88.50000)
41024,176.5,88.5,POINT (176.50000 88.50000)
41025,177.5,88.5,POINT (177.50000 88.50000)
41026,178.5,88.5,POINT (178.50000 88.50000)


### 3. Spatial Analysis

Instead of using a grid, I will use centroids of grid cells from BOATS and calculate the nearest neighbor of a tracer at a specific time step to the centroid of a grid cell. To this end, I will get the indices of the four directly adjacent cell centroids (to the centroid, at which tracer was originally released) to restrict downstream analysis to only those cells (and not diagonal ones).

Here: Only use subset at first time point to prepare code (write into time loop later).

In [5]:
# subset = parcels_dist.isel(trajectory=np.arange(0,6)) #6 drifters per time point in this example
# dfSubset = subset.to_dataframe().reset_index()
# gdfSubset = gp.GeoDataFrame(
#     dfSubset, geometry=gp.points_from_xy(dfSubset.lon,dfSubset.lat))
# gdfSubset = gdfSubset.set_crs('epsg:4326')
# gdfSubset

I will create two separate lists of lists (outer list: length of tracers (41028), inner list: length of days that the simulation is run per season)). The list for later probabilities contains the cells that the tracers move to based on the nearest neighbor (ADD: only give option of starting cell centroid and 4 directly adjacent cells centroids). Get indices for those first. The crossing time list contains the time difference of when a tracer is released and when it is first detected in a new cell (closest to a new centroid).

In [45]:
IndexList = [None]*6 #np.size(np.unique(gdfSubset.trajectory)) #make this as long as number of grid cells from which tracers are released
CrossTimeList = [None]*6 #np.size(np.unique(gdfSubset.trajectory)) #make this as long as number of grid cells from which tracers are released

counter = 0
for time in np.arange(0,4):
    subset = parcels_dist.isel(trajectory=np.arange(counter, counter + 6)) #6 drifters per time point in this example
    dfSubset = subset.to_dataframe().reset_index()
    gdfSubset = gp.GeoDataFrame(dfSubset, geometry=gp.points_from_xy(dfSubset.lon,dfSubset.lat))
    gdfSubset = gdfSubset.set_crs('epsg:4326')
    
    if time >= 0: 
            gdfSubset.trajectory = gdfSubset.trajectory - counter #to get tracer indices back to 0 (start from beginning)
    
    for i in np.arange(0, np.size(np.unique(gdfSubset.trajectory))):
        CurrentTracer = gdfSubset[gdfSubset.trajectory == i]
        NearNeigh = CurrentTracer.sjoin_nearest(gdfBOATS, distance_col="distances") #get nearest 

        df1 = NearNeigh.drop_duplicates(['trajectory','index_right']) #drops everything where in same cell
        if len(df1) == 1: #means that tracer does not reach another cell in time frame in which tracer is tracked
            MoveToCell = df1.groupby('trajectory', as_index=False).nth(0).reset_index(drop=True).index_right[0]
            crossingTime = 'NaT' #because we will sum over and divide by average crossing time later?

        else:
            MoveFrom = df1.groupby('trajectory', as_index=False).nth(0) #gets start element (to extract time)
            MoveTo = df1.groupby('trajectory', as_index=False).nth(1) #gets second element (when tracer detected in new cell (is closer to different centroid for the first time))

            crossingTime = (MoveTo.reset_index(drop=True).time[0] - MoveFrom.reset_index(drop=True).time[0]).total_seconds() 
            MoveToCell = MoveTo.reset_index(drop=True).index_right[0] #gives index that tracer moves to in gdfBOATS

        if time == 0:
            IndexList[i] = [MoveToCell]
            CrossTimeList[i] = [crossingTime]
        else:
            IndexList[i].append(MoveToCell)
            CrossTimeList[i].append(crossingTime)
    counter = counter + 6



























In [46]:
CrossTimeList

[['NaT', 'NaT', 'NaT', 'NaT'],
 ['NaT', 'NaT', 'NaT', 'NaT'],
 ['NaT', 'NaT', 'NaT', 313200.0],
 ['NaT', 313200.0, 266400.0, 234000.0],
 [234000.0, 252000.0, 244800.0, 234000.0],
 [165600.0, 169200.0, 172800.0, 176400.0]]

In [47]:
IndexList

[[24275, 24275, 24275, 24275],
 [24276, 24276, 24276, 24276],
 [24277, 24277, 24277, 24278],
 [24278, 24279, 24279, 24279],
 [24005, 24005, 24280, 24280],
 [24281, 24281, 24281, 24281]]

In [None]:
# IndexList = [None]*np.size(np.unique(gdfSubset.trajectory))
# CrossTimeList = [None]*np.size(np.unique(gdfSubset.trajectory))

# time = 0

# for i in np.arange(0, 2):#np.size(np.unique(gdfSubset.trajectory))):
#     CurrentTracer = gdfSubset[gdfSubset.trajectory == i]
#     NearNeigh = CurrentTracer.sjoin_nearest(gdfBOATS, distance_col="distances") #get nearest 
    
#     df1 = NearNeigh.drop_duplicates(['trajectory','index_right']) #drops everything where in same cell
#     if len(df1) == 1: #means that tracer does not reach another cell
#         MoveToCell = df1.groupby('trajectory', as_index=False).nth(0)
#         crossingTime = 'NaT' #because we will sum over and divide by average crossing time later?
    
#     else:
#         MoveFrom = df1.groupby('trajectory', as_index=False).nth(0) #gets start element (to extract time)
#         MoveTo = df1.groupby('trajectory', as_index=False).nth(1) #gets second element (when tracer detected in new cell (is closer to different centroid for the first time))
    
#         crossingTime = (MoveTo.reset_index(drop=True).time[0] - MoveFrom.reset_index(drop=True).time[0]).total_seconds() 
#         MoveToCell = MoveTo.reset_index(drop=True).index_right[0] #gives index that tracer moves to in gdfBOATS
        
#     if time == 0:
#         IndexList[i] = [MoveToCell]
#         CrossTimeList[i] = [crossingTime]
#     else:
#         IndexList[i].append(MoveToCell)
#         CrossTimeList[i].append(crossingTime)
    

In [None]:
CrossTimeList