In [1]:
import os
import datetime
import geopandas as gpd
import pandas as pd
import json
import shapely.ops

import sentinelhub
import boto3

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import sys
sys.path.append('..')

In [3]:
import mysecrets
import cdseutils.utils as utils
import cdseutils.mydataclasses as mydataclasses
import cdseutils.constants as constants

In [4]:
cdse_creds = mysecrets.PROFILES['nikhilsasirajan@gmail.com']

In [5]:
year = 2020

startdate = datetime.datetime(year, 3, 1)
enddate = datetime.datetime(year + 1, 1, 31)

In [6]:
(enddate - startdate).days

336

In [7]:
import s2l1c_via_s3

In [8]:
s2l1c_via_s3.download_sentinel2_l1c_tiles(
    cdse_creds = cdse_creds,
    catalog_save_folderpath = '../data/fetch_catalogue_cache',
    root_download_folderpath = '../data/testing_download',
    roi_filepath = '../../ethiopia/data/Ethiopia_Bounding_Boxs/Ethiopia_2021_East_Bounding_Box.shp',
    startdate = startdate,
    enddate = enddate,
    upper_limit_for_number_of_tiles = 100,
)



ValueError: Are you sure you wish to download 265 image sets?
This is roughly 185.5 GB of download. This exceeds upper_limit_for_number_of_tiles=100.
Kindly discuss with your team before you go ahead.

In [None]:
eth_bb_east_gdf = gpd.read_file('../../ethiopia/data/Ethiopia_Bounding_Boxs/Ethiopia_2021_East_Bounding_Box.shp')

In [None]:
bboxes = utils.get_bboxes(shapes_gdf=eth_bb_east_gdf)

In [None]:
catalogue_gdf, results = utils.fetch_catalog(
    sh_creds=cdse_creds.sh_creds,
    collection=sentinelhub.DataCollection.SENTINEL2_L1C,
    startdate=startdate,
    enddate=enddate,
    bboxes=bboxes,
    cache_folderpath = '../data/fetch_catalogue_cache'
)

In [None]:
catalogue_filepath = '../data/eth_east_bb_catalogue.geojson'
results_filepath = '../data/eth_east_bb_catalogue_all.json'

if not os.path.exists(catalogue_filepath) or not os.path.exists(results_filepath):
    catalogue_gdf, results = utils.fetch_catalog(
        sh_creds=cdse_creds.sh_creds,
        collection=sentinelhub.DataCollection.SENTINEL2_L1C,
        startdate=startdate,
        enddate=enddate,
        bboxes=bboxes,
    )
    catalogue_gdf.to_file(catalogue_filepath)
    with open(results_filepath, 'w') as h:
        json.dump(results, h)
else:
    catalogue_gdf = gpd.read_file(catalogue_filepath)
    with open(results_filepath) as h:
        results = json.load(h)

In [None]:
bbox, startdate, enddate

In [None]:
catalogue_gdf['cloud_cover'] = [x['properties']['eo:cloud_cover'] for x in results]

In [None]:
catalogue_gdf.columns

In [None]:
f'{1 if False else 2}'

In [None]:
eth_bb_east_gdf

In [None]:
gpd.sjoin(catalogue_gdf, eth_bb_east_gdf[['geometry']].to_crs(catalogue_gdf.crs))['id'].unique()

In [None]:
(catalogue_gdf['cloud_cover'] > 80).sum()

In [None]:
catalogue_gdf['cloud_cover'].hist(bins=100)

In [None]:
# 4 -> 59.9 secs
# 8 -> 30.0 secs
# 16 -> 18.4 secs
# 32 -> 23 secs
# 64 -> 27.7 secs
s3_paths, download_filepaths = utils.get_sentinel2_s3_paths(
    s3_urls = catalogue_gdf['s3url'],
    s3_creds = cdse_creds.s3_creds,
    root_folderpath = '../data/testing_download',
    bands = constants.Bands.Sentinel2.ALL,
)

In [None]:
len(s3_paths), len(download_filepaths)

In [None]:
import pandas as pd

In [None]:
s3_paths_df = pd.DataFrame(data = {
    's3_bucket': [s3_path.bucket for s3_path in s3_paths],
    's3_prefix': [s3_path.prefix for s3_path in s3_paths],
    'download_filepath': download_filepaths,
})

In [None]:
all(s3_paths_df['s3_prefix'].str[-6:] == s3_paths_df['download_filepath'].str[-6:])

In [None]:
selected_row = catalogue_gdf[['timestamp', 's3url']].loc[0].to_dict()
selected_row['timestamp'], selected_row['s3url']

In [None]:
catalogue_gdf.shape

In [None]:
catalogue_gdf.iloc[catalogue_gdf['geometry'].drop_duplicates().index].to_file('../data/eth_east_bb_catalogue_s2l1c.geojson')

In [None]:
results[0]

In [None]:
catalogue_gdf.loc[0, 'id']

In [None]:
utils.sentinel2_id_parser(sentinel2_id = catalogue_gdf.loc[0, 'id'])

In [None]:
def add_sentinel2_id_parsed_cols(row, id_col:str = 'id'):
    sentinel2_id = row[id_col]
    parsed_out = utils.sentinel2_id_parser(
        sentinel2_id = sentinel2_id,
    )
    for key, val in parsed_out.items():
        row[key] = val
    return row

In [None]:
catalogue_gdf = catalogue_gdf.apply(
    lambda row: add_sentinel2_id_parsed_cols(row=row),
    axis = 1
)

In [None]:
catalogue_gdf.columns

In [None]:
catalogue_gdf[['tile_number_field']].value_counts()

In [None]:
catalogue_gdf.loc[0, 's3url']

In [None]:
# session = boto3.session.Session()
s3 = boto3.resource(
    's3',
    endpoint_url = cdse_creds.s3_creds.endpoint_url,
    aws_access_key_id = cdse_creds.s3_creds.s3_access_key,
    aws_secret_access_key = cdse_creds.s3_creds.s3_secret_key,
    region_name = cdse_creds.s3_creds.region_name,
)  # generated secrets

In [None]:
selected_row['s3url'].replace('.SAFE/', '').split('/')[-1]

In [None]:
splits = selected_row['s3url'].replace('s3://EODATA/', '').split('/')[-2].replace('.SAFE', '').split('_')
date_str = splits[2]
tile_id = splits[5]
tile_id, date_str

In [None]:
files_to_download = [
    f'{tile_id}_{date_str}_{band}.jp2' for band in cdse_utils.constants.Bands.Sentinel2.ALL
]
files_to_download

In [None]:
'4123'.endswith('123')

In [None]:
selected_row['s3url']

In [None]:
files = s3.Bucket("eodata").objects.filter(
    Prefix=selected_row['s3url'].replace('s3://EODATA/', '')
)

In [None]:
list_of_files = list(files)

In [None]:
# list_of_files

In [None]:
s3_paths, download_filepaths = get_sentinel2_s3_paths_single_url(
    s3_creds = cdse_creds.s3_creds,
    s3_url = selected_row['s3url'],
    root_folderpath = '../data/testing_download',
    bands = constants.Bands.Sentinel2.ALL,
)

In [None]:
download_filepaths

In [None]:
# 28 secs, 13 files, 2.23s / it
download_successes = cdse_utils.utils.download_s3_files(
    s3_creds = cdse_creds.s3_creds,
    s3_paths = s3_paths,
    download_filepaths = download_filepaths,
)