In [1]:
%load_ext autoreload
%autoreload 2

import sys
sys.path.append("../")

In [9]:
import requests
from bs4 import BeautifulSoup
from pathlib import Path
import urllib
import numpy as np

import rasterio
import xarray as xa
import rioxarray as rxr
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import cartopy.feature as cfeature
from cartopy.mpl.gridliner import LONGITUDE_FORMATTER, LATITUDE_FORMATTER
import geopandas as gpd

from coralshift.utils import file_ops, directories
from coralshift.dataloading import bathymetry

## Ensure necessary data present: if not, install

In [10]:
# 30m GBR bathymetry (can visualise ETOPO later if necessary)
gbr_30_dir = directories.get_gbr_bathymetry_data_dir()
# apparently not checking existing files properly
bathymetry.download_30m_gbr_bathymetry(download_dest_dir=gbr_30_dir)





0.00s - make the debugger miss breakpoints. Please pass -Xfrozen_modules=off
0.00s - to python to disable frozen modules.
0.00s - Note: Debugging will proceed. Set PYDEVD_DISABLE_FILE_VALIDATION=1 to disable this validation.
Great_Barrier_Reef_A_2020_30m_MSL_cog.tif:   2%|▏         | 24.1M/989M [00:12<08:10, 1.97MB/s]   


KeyboardInterrupt: 

In [None]:
# read tif

src = rasterio.open('/Users/orlandotimmerman/Library/CloudStorage/OneDrive-UniversityofCambridge/cambridge/mres/mres_project/coralshift/datasets/bathymetry/GBR_30m/Great_Barrier_Reef_A_2020_30m_MSL_cog.tif')
gbr_a_data = src.read(1)

In [None]:
files = file_ops.return_list_filepaths("/Users/orlandotimmerman/Library/CloudStorage/OneDrive-UniversityofCambridge/cambridge/mres/mres_project/coralshift/coralshift/datasets/bathymetry/GBR_30m", "tif")
dict_out = open_tifs_to_dict(files)

In [None]:
gbr_a = dict_out['Great_Barrier_Reef_A_2020_30m_MSL_cog.tif']

In [None]:
gbr_a

In [None]:
from coralshift.processing import data

In [None]:
type(gbr_a_coarse.coords)

In [None]:
display_xa_array(gbr_a)

In [None]:
gbr_a_coarse.plot(cmap='gist_earth', vmin=min_val, vmax=max_val)

In [None]:
gbr_a_coarse[0, -50:, -50:].plot(cmap='gist_earth', vmin=min_val, vmax=max_val)

In [None]:
gbr_a[0, -5000:, -5000:].plot(cmap='gist_earth', vmin=min_val, vmax=max_val)

In [None]:
gbr_a.plot()

In [None]:
fig = plt.figure(figsize=(12, 6), dpi=300)
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())

ax.add_feature(cfeature.LAND.with_scale("10m"))
ax.add_feature(cfeature.OCEAN.with_scale("10m"))

gbr_a.plot()


In [None]:
out['Great_Barrier_Reef_A_2020_30m_MSL_cog.tif'].bounds

In [None]:
gbr_a_coarse.coords

In [None]:
upsample_xarray(gbr_a, {"x": 10, "y": 10})

In [None]:



def align_tifs_to_worldmap(tifs_dict: dict) -> gpd.GeoDataFrame:
    """TODO: docstring"""
    
    gdf_list = []
    for tif_name, tif_array in tifs_dict:
        bbox_gdf = align_tifs_to_worldmap(tif_array)
        gdf_list.append(bbox_gdf)

    all_gdf = gpd.GeoDataFrame(pd.concat(gdf_list, ignore_index=True), crs=gdf_list[0].crs)	
    return all_gdf


def tif_to_gdf(xa_array) -> gpd.GeoDataFrame:
    """TODO: function to line tif files up with world map"""

    # Create GeoDataFrame with extent of the raster
    xmin, ymin, xmax, ymax = xa_array.bounds
    bbox_gdf = gpd.GeoDataFrame({'geometry': gpd.box(xmin, ymin, xmax, ymax)}, index=[0], crs=xa_array.crs)

    # Reproject the GeoDataFrame to Web Mercator
    bbox_gdf = bbox_gdf.to_crs(epsg=3857)

    return bbox_gdf


# def display_gdf_on_worldmap(gdf: gpd.GeoDataFrame) -> None:
    

# function to return pixel values closest to the shoreline
def return_pixels_closest_to_value(
    array: np.ndarray, 
    central_value: float, 
    tolerance: float = .5, 
    buffer_pixels: int = 10,
    bathymetry_only: bool = True
    ) -> np.ndarray:
    """Returns a 1D array of all the pixels in the input array that are closest to a specified central value within a 
    given tolerance and within a pixel buffer zone.

       Parameters
    ----------
    array (np.ndarray): The input array of pixel values.
    central_value (float): The central value to which the pixels should be compared.
    tolerance (float, optional): The tolerance within which the pixels are considered to be "close" to the central 
        value. Defaults to 0.5.
    buffer_pixels (int, optional): The size of the buffer zone around the pixels. Defaults to 10.
    bathymetry_only (bool, optional): Whether to only consider bathymetric data, i.e., values less than zero. 
        Defaults to True.

    Returns
    -------
    np.ndarray: A 1D array of all the pixels in the input array that are closest to the specified central value within 
        the given tolerance and within the pixel buffer zone.
    """
    binary = np.isclose(array, central_value, atol=0.5)
    # morphological dilation operation
    dilated = binary_dilation(binary, iterations=buffer_pixels)

    array_vals = array[dilated]
    # if specifying only bathymetric data
    if bathymetry_only:
        array_vals = array_vals[array_vals < 0]
    
    # return only non-zero values as 1d array
    return array_vals[np.nonzero(array_vals)]


def return_distance_closest_to_value(
    array: np.ndarray, 
    central_value: float, 
    tolerance: float = .5, 
    buffer_distance: float = 300,
    distance_per_pixel: float = 30,
    bathymetry_only: bool = True,
) -> np.ndarray:
    """Wrapper for return_pixels_closest_to_value() allowing specification by distance from thresholded values rather 
    than number of pixels
    
    Returns a 1D array of all the pixels in the input array that are closest to a specified central value within a 
    given tolerance and within a distance buffer zone.

       Parameters
    ----------
    array (np.ndarray): The input array of pixel values.
    central_value (float): The central value to which the pixels should be compared.
    tolerance (float, optional): The tolerance within which the pixels are considered to be "close" to the central 
        value. Defaults to 0.5.
    buffer_distance (float, optional): The size of the buffer zone around the pixels. Defaults to 300.
    bathymetry_only (bool, optional): Whether to only consider bathymetric data, i.e., values less than zero. 
        Defaults to True.

    Returns
    -------
    np.ndarray: A 1D array of all the pixels in the input array that are closest to the specified central value within 
        the given tolerance and within the distance buffer zone.
    """
    buffer_pixels = buffer_distance / distance_per_pixel
    return return_pixels_closest_to_value(array, central_value, tolerance, buffer_pixels, bathymetry_only)

In [None]:
# plt.imshow(gbr_a_data[0:10000, 0:10000])
plt.imshow(gbr_a_data)
plt.show()

In [None]:
binary = np.isclose(data_array[0], 0, atol=0.5)

In [None]:
sum(sum(binary))

In [None]:
from scipy.ndimage import binary_dilation

# Perform a morphological dilation operation
buffer_size = 10  # Define the buffer size
struct_elem = np.ones((buffer_size, buffer_size))  # Define the structuring element
dilated = binary_dilation(binary, iterations=buffer_size)

In [None]:
plt.figure(figsize = (15,10))
plt.imshow(dilated)

In [None]:
plt.figure(figsize = (15,10))
plt.imshow(shoreline)

In [None]:
out = return_pixels_closest_to_value(data_array[0].values, 0, buffer_pixels=1)

In [None]:
out
shallow_out = out[out > -100]

In [None]:
plt.hist(out,100);

In [None]:
# function to plot histogram of values

fig, ax = plt.subplots()
xa.plot.hist(data_array, ax=ax, bins=100)
ax.set_xlabel("depth")
ax.set_ylabel("counts")
ax.set_title("Histogram of DEM counts for selected area")

In [None]:
src.crs

In [None]:
data_array = xa.open_rasterio(src)

In [None]:
data_array[0]
# rename coordinate and value fields

In [None]:
new_name_dict = {'y': 'latitude', 'x': 'longitude'}

data_array = data_array.rename(new_name_dict)

In [None]:
data_array[0, 0:5000, 0:5000]

In [None]:
## module import error
import gdal
# ds = gdal.Open('/Users/orlandotimmerman/Library/CloudStorage/OneDrive-UniversityofCambridge/cambridge/mres/mres_project/coralshift/datasets/bathymetry/GBR_30m/Great_Barrier_Reef_A_2020_30m_MSL_cog.tif')
# channel = np.array(ds.GetRasterBand(1).ReadAsArray())

In [None]:
data_array[0, :1000, :1000].plot(x='longitude', y='latitude', figsize=(6,4))

In [None]:
# df = data_array[0].to_dataframe(name='asdf').reset_index()
# gdf = gpd.GeoDataFrame(df.value_column, geometry=gpd.points_from_xy(df.y,df.x))


In [None]:
plot.gdf()

In [None]:
bbox_gdf

In [None]:
plot.bbox_gdf()

In [None]:
# Create plot – TODO: update with custom bounds
fig, ax = plt.subplots(figsize=(10, 10))

# Plot the raster on the GeoDataFrame extent
rasterio.plot.show(bbox_gdf, ax=ax)

In [None]:
# function to line tif files up with world map



In [None]:
nc_dir = '/Users/orlandotimmerman/Library/CloudStorage/OneDrive-UniversityofCambridge/cambridge/mres/mres_project/coralshift/datasets/bathymetry/ETOPO22'
name = 'ETOPO_2022_v1_15s_N00E000_geoid.nc'

Path(nc_dir, name)

In [None]:
def merge_nc_files(nc_dir: Path | str, file_names: list[str]) -> xa.Dataset:
	files = [Path(nc_dir, file_name) for file_name in file_names]
	merged_ncs = xa.open_mfdataset(files)
	return merged_ncs

In [None]:
file_names = ["ETOPO_2022_v1_15s_N00E000_geoid.nc", "ETOPO_2022_v1_15s_N00E015_geoid.nc", "ETOPO_2022_v1_15s_N00E030_geoid.nc"]
# bathy_xa = xa.open_dataset('/Users/orlandotimmerman/Library/CloudStorage/OneDrive-UniversityofCambridge/cambridge/mres/mres_project/coralshift/datasets/bathymetry/ETOPO22/ETOPO_2022_v1_15s_N00E000_geoid.nc')
out = merge_nc_files(nc_dir, file_names)

In [None]:
fig = plt.figure(figsize=(12, 6), dpi=300)
ax = fig.add_subplot(1, 1, 1, projection=ccrs.PlateCarree())
# Add a global map background
ax.stock_img()

out['z'].plot(ax=ax, x='lon', y='lat')

## Webscraping data: really not a priority

In [None]:
from selenium import webdriver
from bs4 import BeautifulSoup
import time

# Set up the Selenium driver with Chrome
driver = webdriver.Chrome('/path/to/chromedriver')

# Navigate to the webpage with the download button
driver.get('https://example.com/download-page')

# Wait for the page to fully load
time.sleep(5)

# Find the download button using its text or other identifying feature
download_button = driver.find_element_by_xpath('//button[text()="Download"]')

# Click the button to trigger the download link generation
download_button.click()

# Wait for the download link to be generated
time.sleep(5)

# Get the page source with the download link
page_source = driver.page_source

# Parse the page source with BeautifulSoup to extract the download link
soup = BeautifulSoup(page_source, 'html.parser')
download_link = soup.find('a', {'class': 'download-link'})['href']

# Download the file using the extracted download link
# ... (your code to download the file)

In [None]:
# rich progress bar I couldn't get working
# import urllib.request
# from rich.progress import Progress, BarColumn, DownloadColumn, TransferSpeedColumn, TimeRemainingColumn


# class DownloadProgressBar:
#     def __init__(self, unit='B'):
#         self.progress = Progress(
#             "{task.description}",
#             BarColumn(),
#             DownloadColumn(),
#             TransferSpeedColumn(),
#             TimeRemainingColumn(),
#         )
#         self.unit = unit

#     def __enter__(self):
#         self.task_id = self.progress.add_task("", start=False)
#         self.progress.start()
#         return self

#     def __exit__(self, *exc_info):
#         self.progress.stop()

#     def update_to(self, b=1, bsize=1, tsize=None):
#         if tsize is not None:
#             # Convert the total size to the specified unit
#             total_size = tsize / self.unit_size
#             self.progress.update(self.task_id, total=total_size)
#         self.progress.update(self.task_id, advance=b * bsize / self.unit_size)

#     @property
#     def unit_size(self):
#         # Return the size of one unit in bytes
#         if self.unit == 'B':
#             return 1
#         elif self.unit == 'KB':
#             return 1024
#         elif self.unit == 'MB':
#             return 1024 * 1024
#         elif self.unit == 'GB':
#             return 1024 * 1024 * 1024
#         else:
#             raise ValueError(f"Invalid unit: {self.unit}")

# def download_url(url, output_path, progress_units: str = 'MB'):
#     print("\n")
#     with DownloadProgressBar(progress_units) as t:
#         urllib.request.urlretrieve(url, filename=output_path, reporthook=t.update_to)
