#  Download full SAFE archive as .zip and upload to kaggle

 __To use the features in this notebook you need to visit https://dataspace.copernicus.eu and create an account with Copernicus, the official governing body of Sentinel Missions for the European Space Agency (ESA). This takes about 5 minutes to do.__

In [7]:
import os
import sys
import pandas as pd
import requests
import json
import datetime
from tqdm import tqdm
from dotenv import load_dotenv
import subprocess
from pathlib import Path

# Batch setup
Steps:
- make sure your credentials you use to log into are appropriately stored in a .env file in this format with quotation marks: <br>
CDSE_email = 'youremail' <br>
CDSE_password = 'yourpassword'<br><br>
- change satellite, if S2B batch <br><br>
- change startDate and endDate to reflect the Quarter for your batch
<br>i.e. all days inclusive | Q1: Jan-Mar | Q2: Apr-Jun | Q3: Jul-Sep | Q4: Oct-Dec<br><br>
- output_dir to reflect the REgion & time for your batch in folder name<br> <br>__!!KEEP it strictly in RE_MMYY format!!__  <br><br>
- leave query_satellite and query_tile unchanged


### 🛰️ Sentinel-2 Data Summary

| 🛰️ Satellite Type | 📅 From       | 📅 To         | 📦 Number of SAFE Files | 💾 Estimated Size     |
|-------------------|--------------|--------------|--------------------------|------------------------|
| S2A_MSIL1C         | 2015-07-04   | 2016-10-16   | 30                       | Maximum 27 GB 💽       |
| S2A_MSIL1C         | 2017-02-20   | 2017-10-08   | 17                       | Maximum 17 GB 💽       |
| S2A_MSIL1C         | 2018-03-27   | 2018-11-12   | 27                       | Maximum 24 GB 💽       |
| <s>S2A_MSIL1C         | <s>2019-02-13   | <s>2019-10-11   | <s>17</s> 19                       | Maximum 15 GB 💽     |
| S2A_MSIL1C         | 2020-03-16   | 2020-11-04   | 18                       | Maximum 16 GB 💽       |
| S2A_MSIL1C         | 2021-03-01   | 2021-08-18   | 18                       | Maximum 16 GB 💽       |
| S2B_MSIL1C         | 2017-07-05   | 2018-11-17   | 37                       | Maximum 32 GB 💽       |
| S2B_MSIL1C         | 2019-02-08   | 2019-10-16   | 14                       | Maximum 13 GB 💽       |
| S2B_MSIL1C         | 2020-02-20   | 2020-11-19   | 21                       | Maximum 19 GB 💽       |
| S2B_MSIL1C         | 2021-03-29   | 2021-06-17   | 10                       | Maximum 8 GB 💽        |

In [8]:
# 1 Required satellite category
query_satellite = 'SENTINEL-2'

# 2 String to be included in filename for retrieval of specific product by name, 
# i.e S2A vs S2B, and code for tile name
query_product = 'S2A_MSIL1C_' # change to S2B_MSIL1C_
query_tile = 'T33TUL'   # stays the same

# 3 Enter a start and end date
query_startDate = '2019-01-01'
query_endDate = '2019-12-31'

# 4 Load your credentials from .env
load_dotenv()
username=os.getenv("CDSE_email")
password=os.getenv("CDSE_password")
# if not already in .env config, insert them as 'string' 
# values in the following format to the .env file:
CDSE_email = username
CDSE_password = password

# 5 Set output file:
output_dir = './SAFE/PO_2A_19' #edit folder name within SAFE/ as appropriate to add batch folders
# i.e. keep format like 
# ./SAFE/PO_2A_19 for S2A 2019 (Apr-Jun) 
# ./SAFE/PO_2B_21 for S2B 2021 (Jan-Mar) S2B etc. 
# ! DO NOT CHANGE THE LENGTH OF THE FOLDER NAME! 


In [3]:
def get_access_token(username: str, password: str) -> str:
    data = {
        "client_id": "cdse-public",
        "username":username,
        "password":password,
        "grant_type": "password",
        }
    try:
        r = requests.post(
            "https://identity.dataspace.copernicus.eu/auth/realms/CDSE/protocol/openid-connect/token",
            data=data,
        )
        r.raise_for_status()
    except Exception as e:
        raise Exception(
            f"Access token creation failed. Reponse from the server was: {r.json()}"
            )
    print("Access token created successfully!")
    return r.json()["access_token"]

In [4]:

def get_https_request(satellite, product, tile, start_date, end_date): #, geojson
    
    base_prefix = "https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter="
    collection = "Collection/Name eq '" + satellite + "' and startswith(Name,'" + product + "') and contains(Name,'" + tile + "')"
    #roi_coordinates = get_coordinates(geojson)
    #geographic_criteria = "OData.CSC.Intersects(area=geography'SRID=4326;POLYGON((" + roi_coordinates + "))') "
    content_date = (
            "ContentDate/Start gt " + start_date + "T00:00:00.000Z and " +
            "ContentDate/Start lt " + end_date + "T00:00:00.000Z"
    )
    https_request = ( base_prefix + collection +  " and " #Attributes/OData.CSC.DoubleAttribute/any(att:att/Name eq 'cloudCover' and att/OData.CSC.DoubleAttribute/Value le 20.00) and " 
                     + content_date) # geographic_criteria + " and " +
    print("Query URL:", https_request)
    return https_request


def download_data(token, id, name, length, output):
    url = f"https://download.dataspace.copernicus.eu/odata/v1/Products({id})/$value"
    headers = {"Authorization": f"Bearer {token}"}
    session = requests.Session()
    session.headers.update(headers)
    response = session.get(url, headers=headers, stream=True)
    try:
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'Downloading: '+name)
        with open(output, "wb") as file:
            if length is not None:
                # set the total length of the progress bar for tracking downloads
                pbar = tqdm(total=length, unit="B", unit_scale=True, desc=name)
                for chunk in response.iter_content(chunk_size=8192):
                    if chunk:
                        file.write(chunk)
                        # update progress bar
                        pbar.update(len(chunk))
                pbar.close()
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'Download complete: '+name)
        response.close()
    except Exception as e:
        print('[', datetime.datetime.strftime(datetime.datetime.now(), '%H:%M:%S'), '] '+'Download failed: '+name)
        print(f"An exception occured: {e}")


# zip the Safe files for download
def get_file_name(name):
    file_name = ''
    if query_satellite == 'SENTINEL-1':
        file_name = name.replace(".SAFE", ".zip")
    elif query_satellite == 'SENTINEL-2':
        file_name = name.replace(".SAFE", ".zip")
    elif query_satellite == 'SENTINEL-3':
        file_name = name.replace(".SEN3", ".zip")
    elif query_satellite == 'SENTINEL-5P':
        file_name = name.replace(".nc", ".zip")
    elif query_satellite == 'SENTINEL-6':
        file_name = name.replace(".SEN6", ".zip")
    return file_name


# Download non-duplicate tiles matched to litter row data. 
Make sure the path  for ```litterrows = pd.read_excel('../files/LM_centroids.xlsx')```is reflected in your folder structure or 
if using colab, changed to './LM_centroids.xlsx' and the file added to content folder (current workign dir)

In [5]:

request_url = get_https_request(
    query_satellite, query_product, query_tile, query_startDate, query_endDate #, map_geojson, 
)
JSON = requests.get(request_url).json()
if 'detail' in JSON:
    print(JSON['detail']['message'])
    sys.exit()
elif 'value' in JSON:
    df = pd.DataFrame.from_dict(JSON['value'])
    # print(df.columns)
    if len(df) == 0:
        print('No data found')
        sys.exit()
    
    data_id_list = df.Id
    data_name_list = df.Name
    date_content_length = df.ContentLength
else:
    print('Unknown query error')
    sys.exit()

for i in range(len(data_id_list)):
    print(data_name_list[i])
    data_id = data_id_list[i]
    data_name = get_file_name(data_name_list[i])
    data_length = date_content_length[i]
    # Check if the data storage path exists. If not, create the data storage path.
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    output_file = os.path.join(output_dir, data_name)
    # # Check if the file has been downloaded before of it has no recorded windrows, in either case, skip it and do not download it (again).
    litterrows = pd.read_excel('../files/LM_centroids.xlsx') # if colab : ('./LM_centroids.xlsx')
    samples_set = set(litterrows['Str_time'])
    if os.path.exists(output_file) and os.path.getsize(output_file) == data_length:
        print(output_file + ' File already exists')   
    elif output_file[27:42] not in samples_set:
        print(output_file + ' has no recorded litter rows' )
    else:
        access_token = get_access_token(CDSE_email, CDSE_password)
        download_data(access_token, data_id, data_name, data_length, output_file)

Query URL: https://catalogue.dataspace.copernicus.eu/odata/v1/Products?$filter=Collection/Name eq 'SENTINEL-2' and startswith(Name,'S2A_MSIL1C_') and contains(Name,'T33TUL') and ContentDate/Start gt 2019-01-01T00:00:00.000Z and ContentDate/Start lt 2019-12-31T00:00:00.000Z
S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221129T200337.SAFE
Access token created successfully!
[ 02:56:14 ] Downloading: S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221129T200337.zip


S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221129T200337.zip: 100%|██████████| 18.2M/18.2M [00:02<00:00, 6.67MB/s]


[ 02:56:17 ] Download complete: S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221129T200337.zip
S2A_MSIL1C_20190325T101021_N0500_R022_T33TUL_20221117T131900.SAFE
Access token created successfully!
[ 02:56:19 ] Downloading: S2A_MSIL1C_20190325T101021_N0500_R022_T33TUL_20221117T131900.zip


S2A_MSIL1C_20190325T101021_N0500_R022_T33TUL_20221117T131900.zip: 100%|██████████| 670M/670M [01:50<00:00, 6.06MB/s] 


[ 02:58:09 ] Download complete: S2A_MSIL1C_20190325T101021_N0500_R022_T33TUL_20221117T131900.zip
S2A_MSIL1C_20190315T101021_N0500_R022_T33TUL_20221116T052920.SAFE
Access token created successfully!
[ 02:58:11 ] Downloading: S2A_MSIL1C_20190315T101021_N0500_R022_T33TUL_20221116T052920.zip


S2A_MSIL1C_20190315T101021_N0500_R022_T33TUL_20221116T052920.zip: 100%|██████████| 627M/627M [02:37<00:00, 3.98MB/s] 


[ 03:00:49 ] Download complete: S2A_MSIL1C_20190315T101021_N0500_R022_T33TUL_20221116T052920.zip
S2A_MSIL1C_20190322T100031_N0500_R122_T33TUL_20221119T025646.SAFE
Access token created successfully!
[ 03:00:51 ] Downloading: S2A_MSIL1C_20190322T100031_N0500_R122_T33TUL_20221119T025646.zip


S2A_MSIL1C_20190322T100031_N0500_R122_T33TUL_20221119T025646.zip: 100%|██████████| 704M/704M [01:43<00:00, 6.82MB/s] 


[ 03:02:34 ] Download complete: S2A_MSIL1C_20190322T100031_N0500_R122_T33TUL_20221119T025646.zip
S2A_MSIL1C_20190111T100401_N0500_R122_T33TUL_20221208T040313.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190111T100401_N0500_R122_T33TUL_20221208T040313.zip has no recorded litter rows
S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221115T172347.SAFE
Access token created successfully!
[ 03:02:36 ] Downloading: S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221115T172347.zip


S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221115T172347.zip: 100%|██████████| 710M/710M [01:47<00:00, 6.60MB/s] 


[ 03:04:24 ] Download complete: S2A_MSIL1C_20190312T100021_N0500_R122_T33TUL_20221115T172347.zip
S2A_MSIL1C_20190121T100321_N0500_R122_T33TUL_20221202T130949.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190121T100321_N0500_R122_T33TUL_20221202T130949.zip has no recorded litter rows
S2A_MSIL1C_20190124T101311_N0500_R022_T33TUL_20221214T111444.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190124T101311_N0500_R022_T33TUL_20221214T111444.zip has no recorded litter rows
S2A_MSIL1C_20190131T100241_N0500_R122_T33TUL_20221209T001656.SAFE
Access token created successfully!
[ 03:04:26 ] Downloading: S2A_MSIL1C_20190131T100241_N0500_R122_T33TUL_20221209T001656.zip


S2A_MSIL1C_20190131T100241_N0500_R122_T33TUL_20221209T001656.zip: 100%|██████████| 783M/783M [01:57<00:00, 6.65MB/s] 


[ 03:06:24 ] Download complete: S2A_MSIL1C_20190131T100241_N0500_R122_T33TUL_20221209T001656.zip
S2A_MSIL1C_20190203T101221_N0500_R022_T33TUL_20221206T131136.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190203T101221_N0500_R022_T33TUL_20221206T131136.zip has no recorded litter rows
S2A_MSIL1C_20190210T100141_N0500_R122_T33TUL_20221127T074340.SAFE
Access token created successfully!
[ 03:06:26 ] Downloading: S2A_MSIL1C_20190210T100141_N0500_R122_T33TUL_20221127T074340.zip


S2A_MSIL1C_20190210T100141_N0500_R122_T33TUL_20221127T074340.zip: 100%|██████████| 645M/645M [02:34<00:00, 4.19MB/s] 


[ 03:09:01 ] Download complete: S2A_MSIL1C_20190210T100141_N0500_R122_T33TUL_20221127T074340.zip
S2A_MSIL1C_20190213T101131_N0500_R022_T33TUL_20221128T153324.SAFE
Access token created successfully!
[ 03:09:02 ] Downloading: S2A_MSIL1C_20190213T101131_N0500_R022_T33TUL_20221128T153324.zip


S2A_MSIL1C_20190213T101131_N0500_R022_T33TUL_20221128T153324.zip: 100%|██████████| 642M/642M [01:32<00:00, 6.95MB/s] 


[ 03:10:35 ] Download complete: S2A_MSIL1C_20190213T101131_N0500_R022_T33TUL_20221128T153324.zip
S2A_MSIL1C_20190114T101351_N0500_R022_T33TUL_20230912T101753.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190114T101351_N0500_R022_T33TUL_20230912T101753.zip has no recorded litter rows
S2A_MSIL1C_20190220T100031_N0500_R122_T33TUL_20221125T152706.SAFE
Access token created successfully!
[ 03:10:37 ] Downloading: S2A_MSIL1C_20190220T100031_N0500_R122_T33TUL_20221125T152706.zip


S2A_MSIL1C_20190220T100031_N0500_R122_T33TUL_20221125T152706.zip: 100%|██████████| 786M/786M [02:10<00:00, 6.00MB/s] 


[ 03:12:48 ] Download complete: S2A_MSIL1C_20190220T100031_N0500_R122_T33TUL_20221125T152706.zip
S2A_MSIL1C_20190223T101021_N0500_R022_T33TUL_20221127T011157.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190223T101021_N0500_R022_T33TUL_20221127T011157.zip has no recorded litter rows
S2A_MSIL1C_20190302T100021_N0500_R122_T33TUL_20221112T030944.SAFE
Access token created successfully!
[ 03:12:50 ] Downloading: S2A_MSIL1C_20190302T100021_N0500_R122_T33TUL_20221112T030944.zip


S2A_MSIL1C_20190302T100021_N0500_R122_T33TUL_20221112T030944.zip: 100%|██████████| 712M/712M [02:26<00:00, 4.87MB/s] 


[ 03:15:16 ] Download complete: S2A_MSIL1C_20190302T100021_N0500_R122_T33TUL_20221112T030944.zip
S2A_MSIL1C_20190104T101411_N0500_R022_T33TUL_20221130T134310.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190104T101411_N0500_R022_T33TUL_20221130T134310.zip has no recorded litter rows
S2A_MSIL1C_20190305T101021_N0500_R022_T33TUL_20221117T172715.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190305T101021_N0500_R022_T33TUL_20221117T172715.zip has no recorded litter rows
S2A_MSIL1C_20190101T100411_N0500_R122_T33TUL_20221216T215437.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190101T100411_N0500_R122_T33TUL_20221216T215437.zip has no recorded litter rows
S2A_MSIL1C_20190521T100031_N0500_R122_T33TUL_20221215T032340.SAFE
./SAFE/PO_2A_19/S2A_MSIL1C_20190521T100031_N0500_R122_T33TUL_20221215T032340.zip has no recorded litter rows


# kaggle upload additional batch

In [None]:
#project_root = "/Users/sara_mac/Desktop/projects/plastic_detection/Sentinel2PlasticDetectionProject/task2-data-collection/notebooks/SAFE"
project_root = "./SAFE"
if project_root not in sys.path:
    sys.path.append(project_root)
print(project_root)

# Path to data directory for current batch
data_dir = project_root + output_dir[6:]
if data_dir not in sys.path:
    sys.path.append(data_dir)
print(data_dir)

# Set Kaggle config directory
KAGGLE_CONFIG_DIR = os.path.expanduser(project_root + "/.kaggle")
print(KAGGLE_CONFIG_DIR)

./SAFE
./SAFE/PO_2A_19
./SAFE/.kaggle


In [None]:
# def update_kaggle_dataset_with_zip(
#     folder_path,
#     title,
#     dataset_id,
#     description="Zipped Sentinel-2 L1C SAFE folders",
#     license_name="CC-BY-SA-4.0"
# ):
#     folder_path = Path(folder_path)
#     assert folder_path.exists(), "Folder does not exist!"

#     one_back = Path('../')
#     metadata_path = Path( one_back / "dataset-metadata.json")
#     zip_files = [f.name for f in folder_path.glob("*.zip")]

#     # If metadata already exists - loads it
    
#     if metadata_path.exists():
#         with open(metadata_path, 'r') as f:
#             existing_metadata = json.load(f)
#             existing_paths = {res["path"] for res in existing_metadata.get("resources", [])}
#     else:
#         existing_metadata = {
#             "title": title,
#             "id": dataset_id,
#             "licenses": [{
#                 "name": license_name,
#                 "title": "Creative Commons Attribution Share-Alike 4.0",
#                 "path": "https://creativecommons.org/licenses/by-sa/4.0/"
#             }],
#             "resources": []
#         }
#         existing_paths = set()

#     # Add only new zip files to the resources list 
#     for zipf in zip_files:
#         if zipf not in existing_paths:
#             existing_metadata["resources"].append({
#                 "name": Path(zipf).stem,
#                 "path": zipf,
#                 "description": f"Zipped .SAFE Sentinel-2: {zipf}",
#                 "type": "other",
#                 "format": "zip"
#             })

#     with open(metadata_path, 'w') as f:
#         json.dump(existing_metadata, f, indent=2)

#     # Update the dataset on Kaggle
#     subprocess.run([
#         "kaggle", "datasets", "version",
#         "-p", str(folder_path),
#         "-m", "Add new zip files",
#         "--dir-mode", "zip"
#     ], check=True)

# update_kaggle_dataset_with_zip(
#     folder_path = data_dir,
#     title="Litter Rows Italy",
#     dataset_id="sarahajbane/litter-windrows",
    
# )

Metadata file not found: dataset-metadata.json


CalledProcessError: Command '['kaggle', 'datasets', 'version', '-p', 'SAFE/PA_Q1_19', '-m', 'Add new zip files', '--dir-mode', 'zip']' returned non-zero exit status 1.

In [None]:
# one_back = Path('../')
# metadata_path = Path(project_root) / "dataset-metadata.json"

# def update_kaggle_dataset_with_new_files(folder_path, metadata_path, update_msg="Added new data"):
#     folder_path = Path(folder_path)
#     assert folder_path.exists(), "Folder does not exist!"
#     assert metadata_path.exists(), "Metadata does not exist!"
#     try:
#         subprocess.run([
#             "kaggle", "datasets", "version",
#             "-p", str(folder_path),
#             "-m", update_msg,
#             "--dir-mode", "zip"
#         ], check=True)
#     except subprocess.CalledProcessError as e:
#         print(f"Error occurred while updating the Kaggle dataset: {e}")
#         print(f"Command output: {e.output}")
#         raise
#     print("Dataset updated with new files!")

# update_kaggle_dataset_with_new_files(
#     folder_path= data_dir,
#     metadata_path = metadata_path,
#     update_msg="Add new zipped .SAFE Sentinel-2 scenes")

Metadata file not found: dataset-metadata.json
Error occurred while updating the Kaggle dataset: Command '['kaggle', 'datasets', 'version', '-p', 'SAFE/PA_Q1_19', '-m', 'Add new zipped .SAFE Sentinel-2 scenes', '--dir-mode', 'zip']' returned non-zero exit status 1.
Command output: None


CalledProcessError: Command '['kaggle', 'datasets', 'version', '-p', 'SAFE/PA_Q1_19', '-m', 'Add new zipped .SAFE Sentinel-2 scenes', '--dir-mode', 'zip']' returned non-zero exit status 1.

# create a new kaggle dataset from scratch

In [None]:
# !kaggle datasets init -p {project_root}

Data package template written to: ./SAFE/dataset-metadata.json


In [None]:
## update metadata (template won't work) 

# metadata = {
#     "title": "Litter Rows Italy - Dataset For Plastic Detection Algorithms", 
#     "id": "www.kaggle.com/datasets/sarahajbane/litter-rows", 
#       "resources": [
#     {
#       "name": "Litter Rows Italy - Dataset For Plastic Detection Algorithms",
#       #"path": "https://www.kaggle.com/datasets/sarahajbane/litter-rows",
#       "description": "Italy Subset of Sentinel2 L1C images for litter row dataset ",
#       "type": "archive",
#       "format": "zip",
#     }
#       ],
#     "licenses": [
#         {
#       "name": "CC-BY-SA-4.0",
#       "title": "Creative Commons Attribution Share-Alike 4.0",
#       "path": "https://creativecommons.org/licenses/by-sa/4.0/"
#     } # Creative Commons license with proper attribution 
#     ] # to original authors of the litter_row dataset
#     }
# with open(project_root + '/' + 'dataset-metadata.json', 'w') as file:
#     json.dump(metadata, file)

In [None]:
# # Check for kaggle.json credentials
# kaggle_json_path = os.path.expanduser(project_root + "/.kaggle/kaggle.json")
# if not os.path.exists(kaggle_json_path):
#     raise FileNotFoundError(f"The Kaggle API credentials file is missing. Please place your kaggle.json file at {kaggle_json_path}.")


In [None]:
# # Function to create or update a Kaggle dataset from a local folder
# def create_kaggle_dataset_from_folder(
#     folder_path,
#     title,
#     dataset_id,
#     description="Sentinel-2 L1C subset",
#     license_name="CC-BY-SA-4.0"
# ):
#     folder_path = Path(folder_path)
#     assert folder_path.exists(), "Folder does not exist!"

#     metadata_path = folder_path / "dataset-metadata.json"
#     image_files = [f.name for f in folder_path.glob("*.tif*")]

#     resources = [
#         {
#             "name": Path(img).stem,
#             "path": img,
#             "description": f"Image: {img}",
#             "type": "image",
#             "format": "tiff"
#         } for img in image_files
#     ]

#     metadata = {
#         "title": title,
#         "id": dataset_id,
#         "licenses": [{
#             "name": license_name,
#             "title": "Creative Commons Attribution Share-Alike 4.0",
#             "path": "https://creativecommons.org/licenses/by-sa/4.0/"
#         }],
#         "resources": resources
#     }

#     with open(metadata_path, 'w') as f:
#         json.dump(metadata, f, indent=2)

#     # Initialize if necessary
#     if not (folder_path / "dataset-metadata.json").exists():
#         subprocess.run(["kaggle", "datasets", "init", "-p", str(folder_path)])

#     # Create or version the dataset
#     if not any((folder_path / f).exists() for f in ["dataset-metadata.json", "dataset-metadata.yml"]):
#         print("No metadata found, initializing dataset.")
#         subprocess.run(["kaggle", "datasets", "init", "-u", str(folder_path)])

#     try:
#         subprocess.run([
#             "kaggle", "datasets", "create",
#             "-p", str(folder_path),
#             "--dir-mode", "zip"
#         ], check=True)
#     except subprocess.CalledProcessError:
#         subprocess.run([
#             "kaggle", "datasets", "version",
#             "-p", str(folder_path),
#             "-m", "Update data",
#             "--dir-mode", "zip"
#         ])

# # Example call to the above function
# create_kaggle_dataset_from_folder(
#     folder_path = project_root,
#     title="Litter Rows Italy",
#     dataset_id="sarahajbane/litter-windrows",
)

Starting upload for file .kaggle.zip


100%|██████████| 186/186 [00:00<00:00, 468B/s]


Upload successful: .kaggle.zip (186B)
Starting upload for file .DS_Store


100%|██████████| 6.00k/6.00k [00:00<00:00, 15.9kB/s]


Upload successful: .DS_Store (6KB)
Starting upload for file PA_Q1_19.zip


100%|██████████| 588M/588M [04:02<00:00, 2.54MB/s] 


Upload successful: PA_Q1_19.zip (588MB)
Your private Dataset is being created. Please check progress at https://www.kaggle.com/datasets/sarahajbane/litter-windrows
