# Creating the initial position file
 #### The following code read the pourpoints extracted from Arc gis in a way that.so in each watershed we have one point. 
 Then using Pylag create initial position file it will create a release zone for each pour point/groupid, in the end each group_id will contain sets of release zone 
 6/3/2024

#### Part 1-1: import required libraries

In [1]:
import os
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, box
import matplotlib.pyplot as plt
from pylag.processing.coordinate import get_epsg_code, utm_from_lonlat, lonlat_from_utm
from pylag.processing.coordinate import utm_from_lonlat, lonlat_from_utm
from pylag.processing.release_zone import create_release_zones_along_cord
from pylag.processing.plot import create_figure, colourmap
from pylag.processing.plot import FVCOMPlotter
from pylag.processing.release_zone import create_release_zone
from pylag.processing.input import create_initial_positions_file_multi_group
from pylag.processing.input import create_initial_positions_file_single_group

#### Part1-2: read the initial position file and rename the columns

In [2]:
# define the path to the input file
data_dir ='/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron/input/initial_position'
init_file = os.path.join(data_dir, 'WatershedPourPoints_LH_copy_multigroup.dat')

cwd = os.getcwd()
# Create run directory
simulation_dir = '/home/abolmaal/data/FVCOME_OUTPUT/Simulations/Huron'.format(cwd)
try:
    os.makedirs(simulation_dir)
except FileExistsError:
    pass

df = pd.read_csv(init_file, skiprows=0,sep=' ',header=None)

df.rename(columns={0:"group_id", 1:"lon",2:"lat",3:"depth"}, inplace=True)

In [3]:
df['lon'] = df['lon']-360
df

Unnamed: 0,group_id,lon,lat,depth
0,0,-84.665841,45.745061,0.0
1,1,-84.828741,45.749943,0.0
2,2,-84.486036,45.663177,0.0
3,3,-84.234358,45.637350,0.0
4,4,-84.464815,45.655921,0.0
...,...,...,...,...
127,128,-83.425968,45.061532,0.0
128,129,-84.078374,45.487312,0.0
129,130,-83.396562,43.818410,0.0
130,135,-82.406943,43.030980,0.0


#### Insure that all the points are placed inside lake boundary
update the function to check if the point is within the lake's boundary polygon rather than just near it. You can use the contains method of the polygon to determine if the point is inside the lake.

 I want to keep the FVCOME nodes that are only have overlap with Lake_huron boundary and Erase the nodes that are noT inside lake huron and save a new FVCOME node with the name of Lake_HUron_node.shp

In [4]:
# Load Lake Huron shapefile
lake_huron_boundary  = gpd.read_file('/mnt/d/Users/abolmaal/Arcgis/NASAOceanProject/GIS_layer/Basins/hydro_p_LakeHuron/hydro_p_LakeHuron.shp')
# load FVCOM nodes that extracted from the FVCOM model
fvcom_nodes = gpd.read_file('/mnt/d/Users/abolmaal/Arcgis/NASAOceanProject/GIS_layer/Basins/FVCOME/fvcomenodes.shp')


In [None]:
# Ensure both shapefiles are in the same CRS
if fvcom_nodes.crs != lake_huron_boundary.crs:
    lake_huron_boundary = lake_huron_boundary.to_crs(fvcom_nodes.crs)


In [None]:
# Perform a spatial join to keep only the nodes that intersect with Lake Huron boundary
lake_huron_nodes = lake_huron_boundary.sjoin(fvcom_nodes, how='inner', predicate='intersects')

In [None]:
# Drop any unnecessary columns added by the spatial join
lake_huron_nodes = lake_huron_nodes.drop(columns=['index_right'])

In [None]:
# remove the node that are not in the CANADA
lake_huron_nodes = lake_huron_nodes[lake_huron_nodes['COUNTRY'] != 'CAN']

In [None]:
# plot the nodes
fig, ax = plt.subplots(figsize=(10, 10))
lake_huron_boundary.plot(ax=ax, color='blue', edgecolor='black',label = 'Lake Huron Boundary')
lake_huron_nodes.plot(ax=ax, color='red', edgecolor='red', label = 'FVCOM Nodes')
plt.show()


In [None]:
# save the lake_huron_nodes to a shapefile with output name of lake_huron_nodes.shp in the same directory as the input shapefile
lake_huron_nodes.to_file('/mnt/d/Users/abolmaal/Arcgis/NASAOceanProject/GIS_layer/Basins/lake_huron_fvcomenodes.shp')

In [5]:
# read lake_huron_nodes shapefile
lake_huron_nodes_fvcome = gpd.read_file('/mnt/d/Users/abolmaal/Arcgis/NASAOceanProject/GIS_layer/Basins/lake_huron_fvcomenodes.shp')

I have this geodatabase lake_huron_nodes_fvcome that has geometry and CRS now I want to update the following code in a way add a condition to CReate_release zone for that point that have overlap or intersect with lake_huron_nodes_fvcome 

In [6]:
lake_huron_nodes_fvcome['centroid'] = lake_huron_nodes_fvcome.centroid

# Extract the longitude (x) and latitude (y) from the centroid
lake_huron_nodes_fvcome['lon'] = lake_huron_nodes_fvcome['centroid'].x
lake_huron_nodes_fvcome['lat'] = lake_huron_nodes_fvcome['centroid'].y

# Optionally, you can drop the 'centroid' column if it's no longer needed
# lake_huron_nodes_fvcome = lake_huron_nodes_fvcome.drop(columns=['centroid'])

# Display the updated GeoDataFrame
print(lake_huron_nodes_fvcome.tail())


  lake_huron_nodes_fvcome['centroid'] = lake_huron_nodes_fvcome.centroid


       HYDRO_P_  UIDENT  TYPE COUNTRY      NAMEEN                NAMESP  \
20059      3261  552202    16     USA  Lake Huron  Lake Huron/Lac Huron   
20060      3261  552202    16     USA  Lake Huron  Lake Huron/Lac Huron   
20061      3261  552202    16     USA  Lake Huron  Lake Huron/Lac Huron   
20062      3261  552202    16     USA  Lake Huron  Lake Huron/Lac Huron   
20063      3261  552202    16     USA  Lake Huron  Lake Huron/Lac Huron   

          NAMEFR  InstanceID  OID_1    x    y  \
20059  Lac Huron       51741  51740  0.0  0.0   
20060  Lac Huron       51488  51487  0.0  0.0   
20061  Lac Huron       51613  51612  0.0  0.0   
20062  Lac Huron       51617  51616  0.0  0.0   
20063  Lac Huron       51618  51617  0.0  0.0   

                                                geometry  \
20059  POLYGON ((-84.11286 46.32809, -84.10651 46.321...   
20060  POLYGON ((-84.11286 46.32809, -84.10651 46.321...   
20061  POLYGON ((-84.11286 46.32809, -84.10651 46.321...   
20062  POLYGON

In [7]:
# Convert the lat/lon points in df to a GeoDataFrame
# Assuming you have a DataFrame 'df' with 'lon' and 'lat' columns
df['geometry'] = df.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
points_gdf = gpd.GeoDataFrame(df, geometry='geometry', crs='EPSG:4326')

# Ensure lake_huron_nodes_fvcome has the same CRS
if lake_huron_nodes_fvcome.crs != 'EPSG:4326':
    lake_huron_nodes_fvcome = lake_huron_nodes_fvcome.to_crs('EPSG:4326')

# Create a spatial index for lake_huron_nodes_fvcome to speed up intersection checks
lake_huron_nodes_fvcome_sindex = lake_huron_nodes_fvcome.sindex


In [9]:
# Release zone parameters
radius = 0.01
n_particles_target = 100
depth_below_surface = 0.0
release_zones = []


# Function to filter points that intersect with lake_huron_nodes_fvcome
def filter_intersecting_points(points_gdf, lake_huron_nodes_fvcome):
    # Perform the spatial query using the actual geometries (not bounds)
    possible_matches_index = points_gdf.sindex.query(lake_huron_nodes_fvcome.geometry, predicate="intersects")
    
    # Perform a spatial join to retain only intersecting points
    intersecting_points = gpd.sjoin(points_gdf.iloc[possible_matches_index[0]], lake_huron_nodes_fvcome, how='inner', predicate='intersects')
    
    return intersecting_points

# Process points in chunks if needed
chunk_size = 1000  # Adjust chunk size based on available memory
release_zones = []

for i in range(0, len(points_gdf), chunk_size):
    chunk = points_gdf.iloc[i:i + chunk_size]
    intersecting_points = filter_intersecting_points(chunk, lake_huron_nodes_fvcome)
    
    # Loop through the filtered intersecting points
    for index, row in intersecting_points.iterrows():
        group_id = row['group_id']
        lon = row['geometry'].x
        lat = row['geometry'].y
        
        # Create the release zone only for points that intersect the boundary
        surface_release_zone = create_release_zone(group_id=group_id,
                                                   radius=radius,
                                                   centre=[lon, lat],
                                                   n_particles=n_particles_target,
                                                   depth=depth_below_surface,
                                                   random=False)
        
        # Accumulate the release zones
        release_zones.append(surface_release_zone)

# Create input sub-directory
input_dir = os.path.join(simulation_dir, 'input/initial_position')
os.makedirs(input_dir, exist_ok=True)

# Output filename
file_name = os.path.join(input_dir, 'WatershedPourPoints_LH_multigroup_4.dat')

# Write data to file
create_initial_positions_file_multi_group(file_name, release_zones)

print(f"Saved release zones to {file_name}")

IndexError: positional indexers are out-of-bounds

#### Part2: Create a release zone and save it 

In [None]:
# Release zone  
radius = 0.01
# target number of particles
n_particles_target = 100

# Release depths
depth_below_surface = 0.0
# list to accumulate release zones
release_zones = []

# Loop through the rows of the DataFrame
for index, row in df.iterrows():
    group_id = row['group_id']
    lat = row['lat']
    lon = row['lon']
    
    # Create the release zone
    surface_release_zone = create_release_zone(group_id = group_id,
                                           radius = radius,
                                           centre = [lon, lat],
                                           n_particles = n_particles_target,
                                           depth = depth_below_surface,
                                           random = False)
    
    # accumulate the release zones
    release_zones.append(surface_release_zone)
    
    
 # Create input sub-directory
input_dir = os.path.join(simulation_dir, 'input/initial_position')
os.makedirs(input_dir, exist_ok=True)

    # Output filename
file_name = os.path.join(input_dir, 'WatershedPourPoints_LH_multigroup_3.dat')
# Write data to file
create_initial_positions_file_multi_group(file_name, release_zones)


# Solve an error related to PyLag

My Lat and lon number has more than 10 digits and when I run it through Pylag it gives me this error:ValueError: invalid literal for int() with base 10: '0.0' so to avoid this I want to read Lat and Lon and round them by 10


In [None]:
# read file_name using pandas
from decimal import Decimal
df = pd.read_csv(file_name, skiprows=1,sep=' ',header=None)
df.rename(columns={0:"group_id", 1:"lon",2:"lat",3:"depth"}, inplace=True)
# conver lon to lon + 360
df['lon'] = df['lon'] + 360


In [None]:
df

In [None]:
# if df[lon] and df[lat] are greater than 10 digits round them to 10 digits
df['lon'] = df['lon'].round(10)
df['lat'] = df['lat'].round(10)
df['group_id'] = df['group_id'].astype(int)
# remove the header and write the data to the file name called WatershedPourPoints_LH_multigroup_rounded_inisidelake.dat


In [None]:
# save the pandas dataframe to the file name called WatershedPourPoints_LH_multigroup_rounded.dat
file_name_new = os.path.join(input_dir, 'WatershedPourPoints_LH_multigroup_rounded_insidelake.dat')
df.to_csv(file_name_new, sep=' ', index=False, header=False)