#  From here

In [1]:
import os
import sys
import pandas as pd
import requests
import json
import datetime
from tqdm import tqdm
from dotenv import load_dotenv

In [5]:
with open('map.geojson', 'w') as geojson_file:
    json.dump({
        "type": "FeatureCollection",
        "features": [
                {
                        "type": "Feature",
                        "properties": {},
                        "geometry": {
                                "type": "Polygon",
                                "coordinates": [
                                        [
                                                [
                                                        12.449250846912747,
                                                        45.07385221453862
                                                ],
                                                [
                                                        12.449250846912747,
                                                        44.803084676583666
                                                ],
                                                [
                                                        12.888595060560533,
                                                        44.803084676583666
                                                ],
                                                [
                                                        12.888595060560533,
                                                        45.07385221453862
                                                ],
                                                [
                                                        12.449250846912747,
                                                        45.07385221453862
                                                ]
                                        ]
                                ]
                        }
                }
        ]
}, geojson_file)

In [None]:

# 1 Required satellite data
query_satellite = 'SENTINEL-2'

# 2 String to be included in filename for retrieval, either product type, 
# e.g. SLC, product level, e.g. L2A, L1C, or block code, e.g. RVQ
query_contains = 'L1C'

# 3 Enter a start and end date
query_startDate = '2019-07-01'
query_endDate = '2019-07-07'

# 4 Load geo.json polygon of area of interest: 
map_geojson = './map.geojson'

# 6 load your credentials from .env
load_dotenv()
username=os.getenv("CDSE_email")
password=os.getenv("CDSE_password")
# if not already in .env config, insert them as 'string' 
# values in the following format to the .env file:
CDSE_email = username
CDSE_password = password

# 7 Set output file:
output_dir = '../data/SAFE'


In [3]:
def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username":username,
        "password":password,
        "grant_type": "password",
        }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
            )
    print("Access token created successfully!")
    return r.json()["access_token"]

In [None]:

# Coordinates must be given in EPSG 4326, starting and ending at the same point

def get_coordinates(geojson):
    with open(geojson, 'r') as f:
        data = f.read()
    geojson_data = json.loads(data)
    coordinates = geojson_data['features'][0]['geometry']['coordinates'][0]
    coordinates_string = ''
    for i in range(len(coordinates)):
        coordinates_string = coordinates_string + str(coordinates[i][0]) + ' ' + str(coordinates[i][1]) + ', '
    coordinates_string = coordinates_string[:-2]
    return coordinates_string


def get_https_request(satellite, contains, start_date, end_date, geojson, expand):
    
    base_prefix = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter="
    collection = "Collection/Name eq '" + satellite + "' and contains(Name,'" + contains + "')"
    roi_coordinates = get_coordinates(geojson)
    geographic_criteria = "OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((" + roi_coordinates + "))') "
    content_date = (
            "ContentDate/Start gt " + start_date + "T00:00:00.000Z and " +
            "ContentDate/Start lt " + end_date + "T00:00:00.000Z"
    )
    # The default value of the maximum number of items returned from a query is 20.
    # This expands the acceptable number to 100, the maximum allowed is 1000
    top_option = "&$top=100"
    # The expand option enables users to see the full metadata of each returned result.
    # The acceptable arguments for this option: Attributes, Assets and Locations.
    # Expand assets allows to list additional assets of products, including quicklooks
    # Expand Locations allows users to see full list of available products’ forms (compressed/uncompressed)
    # and locations from which they can be downloaded
    # 暂时不开发expand_option功能，expand为空，但保留expand_option语句
    if not expand:
        # 最终检索链接
        https_request = (
                base_prefix + collection + " and " + geographic_criteria + " and " + content_date + top_option
        )
    else:
        expand_option = "&$expand=" + expand
        # 最终检索链接
        https_request = (
                base_prefix + collection + " and " + geographic_criteria + " and " + content_date + top_option +
                expand_option
        )
    return https_request


# 下载数据
def download_data(token, id, name, length, output):
    url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({id})/$value"
    headers = {"Authorization": f"Bearer {token}"}
    session = requests.Session()
    session.headers.update(headers)
    response = session.get(url, headers=headers, stream=True)
    try:
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'开始下载: '+name)
        with open(output, "wb") as file:
            if length is not None:
                # 使用total参数设置进度条的总长度
                pbar = tqdm(total=length, unit="B", unit_scale=True, desc=name)
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        file.write(chunk)
                        # 更新进度条
                        pbar.update(len(chunk))
                # 确保进度条完成
                pbar.close()
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'下载成功: '+name)
        response.close()
    except Exception as e:
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'下载失败: '+name)
        print(f"发生了一个异常: {e}")


# zip the SAfe files for 
def get_file_name(name):
    file_name = ''
    if query_satellite == 'SENTINEL-1':
        file_name = name.replace(".SAFE", ".zip")
    elif query_satellite == 'SENTINEL-2':
        file_name = name.replace(".SAFE", ".zip")
    elif query_satellite == 'SENTINEL-3':
        file_name = name.replace(".SEN3", ".zip")
    elif query_satellite == 'SENTINEL-5P':
        file_name = name.replace(".nc", ".zip")
    elif query_satellite == 'SENTINEL-6':
        file_name = name.replace(".SEN6", ".zip")
    return file_name


# 进行检索
request_url = get_https_request(
    query_satellite, query_contains, query_startDate, query_endDate, map_geojson, query_expand
)
JSON = requests.get(request_url).json()
if 'detail' in JSON:
    print(JSON['detail']['message'])
    sys.exit()
elif 'value' in JSON:
    df = pd.DataFrame.from_dict(JSON['value'])
    # print(df.columns)
    if len(df) == 0:
        print('未查询到数据')
        sys.exit()
    # 原始数据id列表
    data_id_list = df.Id
    # 原始数据name列表
    data_name_list = df.Name
    # 原始数据length列表
    date_content_length = df.ContentLength
else:
    print('存在未知查询错误')
    sys.exit()

for i in range(len(data_id_list)):
    print(data_name_list[i])
    data_id = data_id_list[i]
    data_name = get_file_name(data_name_list[i])
    data_length = date_content_length[i]
    # 判断数据保存路径是否存在，如不存在则创建数据保存路径
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = os.path.join(output_dir, data_name)
    # 判断文件是否已经下载，如已经下载则跳过，不再下载
    if os.path.exists(output_file) and os.path.getsize(output_file) == data_length:
        print(output_file + ' 已经存在，跳过下载')
    else:
        access_token = get_access_token(CDSE_email, CDSE_password)
        download_data(access_token, data_id, data_name, data_length, output_file)

S2A_MSIL1C_20190703T101031_N0500_R022_T32TQR_20230717T022229.SAFE
Access token created successfully!
[ 19:07:43 ] 开始下载: S2A_MSIL1C_20190703T101031_N0500_R022_T32TQR_20230717T022229.zip


S2A_MSIL1C_20190703T101031_N0500_R022_T32TQR_20230717T022229.zip:   7%|▋         | 60.7M/825M [00:08<01:46, 7.21MB/s]

KeyboardInterrupt: 

S2A_MSIL1C_20190703T101031_N0500_R022_T32TQR_20230717T022229.zip:   7%|▋         | 60.7M/825M [00:20<01:46, 7.21MB/s]

In [5]:
print(request_url)

https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and contains(Name,'L1C') and OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((12.449250846912747 45.07385221453862, 12.449250846912747 44.803084676583666, 12.888595060560533 44.803084676583666, 12.888595060560533 45.07385221453862, 12.449250846912747 45.07385221453862))')  and ContentDate/Start gt 2019-07-01T00:00:00.000Z and ContentDate/Start lt 2019-07-07T00:00:00.000Z&$top=1000
