# Create bernCrop Dataset



In [None]:
# load required modules
import cv2
import eodal
import os
import numpy as np
import h5py

from pathlib import Path
from eodal.core.sensors import Sentinel2
import geopandas as gpd
from shapely.geometry import Polygon
from eodal.config import get_settings

# make plots larger by default
import matplotlib.pyplot as plt
plt.rcParams['figure.figsize'] = [15, 15]

print('eodal version: {}'.format(eodal.__version__))

# we need to tell EOdal that we work using a local data source
settings = get_settings()
settings.USE_STAC = False


output_shapefile_path_BERN = f"../raw_data/LANDKULT/data/BERN_big_bbox.shp"
shapefile_path_landkult = '../raw_data/LANDKULT/data/LANDKULT_NUTZFL.shp'
output_shapefile_path_landkult = '../raw_data/LANDKULT/data/LANDKULT_NUTZFL_bern_bbox.shp'
output_shapefile_path_landkult_short = '../raw_data/LANDKULT/data/LANDKULT_NUTZFL_short_bern_bbox.shp'
output_shapefile_path_landkult_short_eodal = 'D:/Temp/AgroLuege/raw_data/LANDKULT/data/LANDKULT_NUTZFL_short_bern_bbox.shp'

In [None]:
# Define the coordinates
x1, y1, x2, y2 = 361630.678100406, 416830.678100406, 5140066.039024595, 5238466.039024595

# Create a GeoDataFrame with a single Point geometry
geometry = Polygon([(x1, y1), (x2, y1), (x2, y2), (x1, y2)])
gdf = gpd.GeoDataFrame(geometry=[geometry], crs="EPSG:32632")

# Save the GeoDataFrame to a shapefile
gdf.to_file(output_shapefile_path_BERN)

# Read the shapefiles into GeoPandas DataFrames
gdf1 = gpd.read_file(shapefile_path_landkult).to_crs('EPSG:32632')
gdf2 = gpd.read_file(output_shapefile_path_BERN).to_crs('EPSG:32632')

# Perform the intersection
gdf1_only_beitrag = gdf1[gdf1['BEITRAG'] == 1]
intersection_gdf_onlybeitrag = gpd.overlay(gdf1_only_beitrag, gdf2, how='intersection')

intersection_gdf_onlybeitrag.to_file(output_shapefile_path_landkult, driver='ESRI Shapefile')
intersection_gdf_onlybeitrag[0:100].to_file(output_shapefile_path_landkult_short, driver='ESRI Shapefile')

In [None]:
for i in [100,150,200,300,500,1000]:
    intersection_gdf_onlybeitrag.sort_values('Shape_Area',ascending=False)[0:i].plot()
    plt.show()
    intersection_gdf_onlybeitrag.sort_values('Shape_Area',ascending=False)['Shape_Area'][0:i].hist()
    plt.show()

for i in [100,150,200,300,500,1000]:
    # intersection_gdf.sort_values('Shape_Area',ascending=True)[0:i].plot()
    # plt.show()
    intersection_gdf_onlybeitrag.sort_values('Shape_Area',ascending=True)['Shape_Area'][0:i].hist()
    plt.show()



In [None]:
del intersection_gdf_onlybeitrag,gdf,gdf1,gdf1_only_beitrag,gdf2

In [None]:
def get_tile_folder_path(data_dir):
    paths = []
    def get_subdirectories(path, depth=0, max_depth=0):
        if depth > max_depth:
            return

        subdirectories = [d for d in os.listdir(path) if os.path.isdir(os.path.join(path, d))]
        for subdir in subdirectories:
            subdir_path = os.path.join(path, subdir)
            if subdir_path.endswith('.SAFE'):
                paths.append(subdir_path)
            get_subdirectories(subdir_path, depth + 1, max_depth)
    get_subdirectories(data_dir)
    return paths

def read_tile_data_from_safe(tile_paths,tile,band_selection = ['B02', 'B03', 'B04', 'B08']):
    base_dir = Path(os.path.dirname(os.path.realpath("__file__"))).parent.parent
    in_file_aoi = base_dir.joinpath(output_shapefile_path_landkult_short_eodal)

    # for path_SAFE in tile_paths[0:5]:
    for path_SAFE in tile_paths:
        print(path_SAFE)
        # read data from .SAFE dataset for the selected AOI and spectral bands
        handler = Sentinel2.from_safe(
            in_dir=Path(path_SAFE),
            vector_features=in_file_aoi,
            band_selection=band_selection,
            apply_scaling=False # if True scales the reflectance values between 0 and 1

        )
        # ignore the value if its blackfilled
        if handler.is_blackfilled == True:
            print(f"Skip is blackfilled: {path_SAFE}")
            continue
        # first resample the spectral bands using bicubic interpolation
        handler.resample(
            target_resolution=10,
            interpolation_method=cv2.INTER_NEAREST_EXACT,
            inplace=True
        )

        # create a numpy array and remove last band
        timestamp_tile_data = [handler.to_xarray().to_numpy()[0:4]]
        # save tile data
        save_tile_data(timestamp_tile_data, tile)

def save_tile_data(temp_results_tensor, tile,dataset_data_name="data"):
    file_name_tile = f'../raw_data/BernCrop/tiles/{tile}.hdf5'
    tile= np.array(temp_results_tensor)
    data_shape = tile.shape

    with h5py.File(file_name_tile, 'a') as hf:
        # Check if the dataset already exists
        if dataset_data_name in hf:
            dataset = hf[dataset_data_name]
        else:
            dtype = "float32"  # Use the appropriate data type for your data
            dataset = hf.create_dataset(dataset_data_name, shape=(0,) + data_shape[1:], dtype=dtype, maxshape=(None,) + data_shape[1:])
            
        current_size = dataset.shape[0]
        new_size = current_size + tile.shape[0]
        # Resize the dataset to accommodate the new batch
        dataset.resize(new_size, axis=0)
        # Append the new batch to the dataset
        dataset[current_size:new_size, :] = tile


def read_tile_data(tile):
    filename_tile = f'../raw_data/BernCrop/tiles/{tile}.hdf5'
    # Open the HDF5 file in read mode
    with h5py.File(filename_tile, "r") as file:
        # Check if the "data" dataset exists in the file
        if "data" in file:
            # Access the dataset and read its contents into a NumPy array
            dataset = file["data"][:]
        else:
            print("Dataset 'data' not found in the HDF5 file.")
    return dataset


In [None]:
# read tile data and save to dhf5 files
tiles = ['T32TLS','T32TLT','T32TMS','T32TMT']
for tile in tiles[0:1]:
# for tile in tiles:
    data_dir = Path('E:/S2_Data_CH22/' + tile)
    tile_paths = get_tile_folder_path(data_dir)
    read_tile_data_from_safe(tile_paths,tile)

In [None]:
tile1 = read_tile_data(tiles[0])

In [None]:
tile1.shape

In [None]:
# import numpy as np

# # Assuming your original array has shape (3, 4, 3888, 4626)
# original_array = np.random.random((3, 4, 3888, 4626))

# # Choose the value of x
# x =np.floor((3888 * 4626) / (24*24)).astype(int)  # Replace with your desired value
# new_shape = (x, 3, 4, 24, 24)

# resized_array = original_array.reshape(new_shape)
# print(resized_array.shape)

In [None]:
# #TODO: labels
# dataset_label_name = "gt"
# #TODO: define label_tensor
# label_shape = result_tensor[:,-1,:,:,-1].shape
# label_tensor = result_tensor[:,-1,:,:,-1]

# with h5py.File(file_name_bern, 'a') as hf:    
#     # Check if the dataset already exists
#     if dataset_label_name in hf:
#         dataset = hf[dataset_label_name]
#     else:
#         dtype = "float32"  # Use the appropriate data type for your data
#         dataset = hf.create_dataset(dataset_label_name, shape=(0,) + label_shape[1:], dtype=dtype, maxshape=(None,) + label_shape[1:])
        
#     current_size = dataset.shape[0]
#     new_size = current_size + label_tensor.shape[0]
#     # Resize the dataset to accommodate the new batch
#     dataset.resize(new_size, axis=0)
#     # Append the new batch to the dataset
#     dataset[current_size:new_size, :] = label_tensor

In [None]:
# Open the HDF5 file in read mode
with h5py.File(file_name_bern, "r") as file:
    # Check if the "data" dataset exists in the file
    if "data" in file:
        # Access the dataset and read its contents into a NumPy array
        dataset_b = file["data"][:]
    else:
        print("Dataset 'data' not found in the HDF5 file.")
