# Creating Sentinel Dataset

In [None]:
%pip install sentinelhub

### Authentication Sentinel

In [3]:
from sentinelhub import SHConfig

import os
CLIENT_ID_SENTINEL = os.getenv("CLIENT_ID_SENTINEL")
CLIENT_SECRET_SENTINEL = os.getenv("CLIENT_SECRET_SENTINEL")
INSTANCE_ID_SENTINEL = os.getenv("INSTANCE_ID_SENTINEL")

config = SHConfig(
    sh_client_id=CLIENT_ID_SENTINEL,
    sh_client_secret= CLIENT_SECRET_SENTINEL,
)

config.instance_id = INSTANCE_ID_SENTINEL

if not config.sh_client_id or not config.sh_client_secret:
    print("Warning! To use Process API, please provide the credentials (OAuth client ID and client secret).")

### Imports

In [5]:
import datetime

import matplotlib.pyplot as plt
import numpy as np

from sentinelhub import (
    CRS,
    BBox,
    DataCollection,
    DownloadRequest,
    MimeType,
    MosaickingOrder,
    SentinelHubDownloadClient,
    SentinelHubRequest,
    bbox_to_dimensions,
)
import json
from shapely.geometry import shape
from shapely.geometry import Polygon, Point, box
import random


from sentinelhub import WebFeatureService, BBox, CRS, DataCollection, SHConfig
from datetime import datetime
from tqdm import tqdm
import sys


In [3]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [None]:
if not os.path.exists("/content/drive"):
    drive.mount("/content/drive")

In [None]:
project_root = os.getenv('PROJECT_ROOT_PATH')
data_save_path = os.getenv('DATA_PATH')
geojson_file_general = os.getenv('GEOJSON_GENERAL')
geojson_file_waterbodies = os.getenv('GEOJSON_WATERBODIES')
geojson_file_boris24 = os.getenv('GEOJSON_BORIS24')
geojson_file_germany21 = os.getenv('GEOJSON_GERMANY21')
geojson_file_germany24 = os.getenv('GEOJSON_GERMANY24')

# Add the project root path to the system path
sys.path.append(project_root)

#necessary imports
from scripts.data_preparation.dataset_creation_sentinel import *

### Downloading Image

In [5]:

def get_sentinel_data(bbox, size, data_folder, time_interval=("2023-9-7", "2023-9-7"), data_collection=DataCollection.SENTINEL2_L2A, config=config):
    """
    Fetches and saves Sentinel-2 L2A data using the specified parameters.
    """

    evalscript_all_bands = """
        //VERSION=3
        function setup() {
            return {
                input: [{
                    bands: ["B02","B03","B04","B08", "CLD", "dataMask", "SCL"],
                    units: ["DN", "DN", "DN", "DN", "DN", "DN", "DN"]
                }],
                output: [
                    {
                        id: "optical_bands",
                        bands: 4,
                        sampleType: "INT16"  // For reflectance bands (B02, B03, B04, B08)
                    },
                    {
                        id: "cld_band",
                        bands: 1,
                        sampleType: "UINT8"    // Cloud probability (0-100%)
                    },
                    {
                        id: "data_mask",
                        bands: 1,
                        sampleType: "UINT8"   // dataMask (0 or 1)
                    },
                    {
                        id: "scl_band",
                        bands: 1,
                        sampleType: "INT8"    // Scene Classification Layer (SCL)
                    }
                ]
            };
        }

        function evaluatePixel(sample) {
            return {
                optical_bands: [sample.B02, sample.B03, sample.B04, sample.B08],
                cld_band: [sample.CLD],
                data_mask: [sample.dataMask], // Will be 0 (valid) or 1 (no data)
                scl_band: [sample.SCL]
            };
        }
    """

    request_all_bands = SentinelHubRequest(
        data_folder=data_folder,
        evalscript=evalscript_all_bands,
        input_data=[
            SentinelHubRequest.input_data(
                data_collection=data_collection,
                time_interval=time_interval,
            )
        ],
        responses=[
            SentinelHubRequest.output_response("optical_bands", MimeType.TIFF),
            SentinelHubRequest.output_response("cld_band", MimeType.TIFF),
            SentinelHubRequest.output_response("data_mask", MimeType.TIFF),
            SentinelHubRequest.output_response("scl_band", MimeType.TIFF)
        ],
        bbox=bbox,
        size=size,
        config=config,
    )

    return request_all_bands

## Main Sentinel

In [None]:
def run(quarter,num_images_per_q, data_collection = DataCollection.SENTINEL2_L2A):
  
  '''
  time_dict = {'Q1':('2024-01-01','2024-03-31'),
                  'Q2':('2024-04-01','2024-06-30'),
                  'Q3':('2024-07-01','2024-09-30'),
                  'Q4':('2024-10-01','2024-12-31')}

  time_dict = {'Q1':('2023-01-01','2023-03-31'),
                 'Q2':('2023-04-01','2023-06-30'),
                 'Q3':('2023-07-01','2023-09-30'),
                 'Q4':('2023-10-01','2023-12-31'),}

  time_dict = {'germany24':('2024-05-30','2024-06-08'),
              'germany21':('2021-07-14','2021-08-05'),
              'boris24':('2024-09-09','2024-09-30'),
              'saarland24':('2024-05-17','2024-06-17'),
              }
  '''
  time_dict = {'Q1':('2022-01-01','2022-03-31'),
                 'Q2':('2022-04-01','2022-06-30'),
                 'Q3':('2022-07-01','2022-09-30'),
                 'Q4':('2022-10-01','2022-12-31'),}

  quarters_time_dict = {quarter:time_dict[quarter]}

  data_path = os.path.join(data_save_path, 'SENTINEL2')
  
  #select necessary geojson file
  geojson_file = geojson_file_general
  
  AOI_Polygons = read_get_aoi_polygons(geojson_file)

  SIZE = (2048,2048)

  for quarter in quarters_time_dict:
    data_folder = f'data_{quarter}_2024_raw/'

    for i in tqdm(range(num_images_per_q), desc=f"Downloading images in {data_folder}", unit="iter"):
      from_date = quarters_time_dict[quarter][0]
      to_date = quarters_time_dict[quarter][1]
      maxcc = 0.5

      #rasndomly select one of the polygons
      #random.seed(44)
      AOI_Polygon = random.choice(AOI_Polygons)
      print(AOI_Polygon)

      random_bbox = generate_random_bbox(AOI_Polygon, bbox_size=0.1)
      list_of_available_times =  tile_at_min_cc(random_bbox,from_date,to_date,data_collection=DataCollection.SENTINEL2_L2A,cloud_coverage_max=maxcc,config=config)

      retry_count = 0
      max_retries = 20

      while len(list_of_available_times)==0 and retry_count < max_retries:
        random_bbox = generate_random_bbox(AOI_Polygon, bbox_size=0.1)
        list_of_available_times =  tile_at_min_cc(random_bbox,from_date,to_date,data_collection=DataCollection.SENTINEL2_L2A,cloud_coverage_max=maxcc,config=config)

        retry_count +=1
        print("searching....", len(list_of_available_times))

        if retry_count == max_retries:
          print("Warning: No image found after max retries, skipping.")
          continue

  
      if not list_of_available_times:
        print("No valid dates found for selected bbox. Skipping this iteration.")
        continue

      date_of_image = list_of_available_times[ random.randint(0, len(list_of_available_times) - 1)]
      image_to_save = get_sentinel_data(random_bbox, SIZE, data_path+data_folder, time_interval=(date_of_image, date_of_image), data_collection=data_collection, config=config)
      image_to_save.save_data()


In [None]:
# random.seed(202401)
# run('Q1',50)

Downloading images in Q1: 100%|██████████| 50/50 [16:04<00:00, 19.29s/iter]


In [None]:
# random.seed(202402)
# run('Q2',50)

Downloading images in Q2: 100%|██████████| 50/50 [13:45<00:00, 16.52s/iter]


In [None]:
# random.seed(202403)
# run('Q3',50)

Downloading images in Q3: 100%|██████████| 50/50 [17:18<00:00, 20.78s/iter]


In [None]:
# random.seed(202404)
# run('Q4',50)

Downloading images in Q4: 100%|██████████| 50/50 [15:19<00:00, 18.39s/iter]


In [None]:
# random.seed(202301)
# run('Q1',50)

Downloading images in Q1: 100%|██████████| 50/50 [13:12<00:00, 15.85s/iter]


In [None]:
# random.seed(202302)
# run('Q2',50)

Downloading images in Q2: 100%|██████████| 50/50 [13:20<00:00, 16.01s/iter]


In [None]:
# random.seed(202303)
# run('Q3',50)

Downloading images in Q3: 100%|██████████| 50/50 [17:55<00:00, 21.52s/iter]


In [None]:
# random.seed(202304)
# run('Q4',50)

Downloading images in Q4: 100%|██████████| 50/50 [13:37<00:00, 16.35s/iter]
