# Download Sentinel 2 L2A Data

Import necessary libraries

In [45]:
# imports
# Supress Warnings
import warnings
warnings.filterwarnings('ignore')

# Visualization
import ipyleaflet
import matplotlib.pyplot as plt
from IPython.display import Image
import seaborn as sns

# Data Science
import numpy as np
import pandas as pd
import xarray as xr

# Planetary Computer Tools
import pystac
import pystac_client
import odc
from pystac_client import Client
from pystac.extensions.eo import EOExtension as eo
from odc.stac import stac_load
import planetary_computer as pc
pc.settings.set_subscription_key('cf5657d28bb2408ba8fd775642c2e1cb')

# Others
import requests
import rich.table
from itertools import cycle
from tqdm import tqdm
from datetime import datetime
import os
from glob import glob

## Downloading Annual Data and Variety of Pixel Areas

<p align="justify">For this experiment the author chose to download data between <b>1st January 2022</b> until <b>31th December 2022</b>. The author also experiment using various of pixel area start from:
    <ol>
        <li><b>1x1</b>,</li>
        <li><b>3x3</b>,</li>
        <li><b>5x5</b>,</li>
        <li><b>7x7</b>, and</li>
        <li><b>10x10</b></li>
    </ol>
</p>

## 1. Preparing Train and Test Coordinates

In [46]:
# reading train data
train_data = pd.read_csv("../../data/Crop_Location_Data.csv")
display(train_data)

# reading test data
test_data = pd.read_csv('../../data/challenge_1_submission_template.csv')
test_data = test_data.rename(columns={"id": "Latitude and Longitude"})
display(test_data)

Unnamed: 0,Latitude and Longitude,Class of Land
0,"(10.323727047081501, 105.2516346045924)",Rice
1,"(10.322364360592521, 105.27843410554115)",Rice
2,"(10.321455902933202, 105.25254306225168)",Rice
3,"(10.324181275911162, 105.25118037576274)",Rice
4,"(10.324635504740822, 105.27389181724476)",Rice
...,...,...
595,"(10.013942985253381, 105.67361318732796)",Non Rice
596,"(10.01348875642372, 105.67361318732796)",Non Rice
597,"(10.013034527594062, 105.67361318732796)",Non Rice
598,"(10.012580298764401, 105.67361318732796)",Non Rice


Unnamed: 0,Latitude and Longitude,target
0,"(10.18019073690894, 105.32022315786804)",
1,"(10.561107033461816, 105.12772097986661)",
2,"(10.623790611954897, 105.13771401411867)",
3,"(10.583364246115156, 105.23946127195805)",
4,"(10.20744446668854, 105.26844107128906)",
...,...,...
245,"(10.308283266873062, 105.50872812216863)",
246,"(10.582910017285496, 105.23991550078767)",
247,"(10.581547330796518, 105.23991550078767)",
248,"(10.629241357910818, 105.15315779432643)",


## 2. Prepare Box Size Area

In [47]:
# prepare bounding box
box_size_deg_3x3 = 0.0002 # Surrounding box in degrees, yields approximately 3x3 pixel region
box_size_deg_5x5 = 0.0004 # Surrounding box in degrees, yields approximately 5x5 pixel region
box_size_deg_7x7 = 0.0006 # Surrounding box in degrees, yields approximately 7x7 pixel region
box_size_deg_10x10 = 0.0009 # Surrounding box in degrees, yields approximately 10x10 pixel region

def create_bbox(lat_long, box_size_deg):
    lat, lon = float(lat_long[0]), float(lat_long[1])
    min_lon = lon-box_size_deg/2
    min_lat = lat-box_size_deg/2
    max_lon = lon+box_size_deg/2
    max_lat = lat+box_size_deg/2
    return (min_lon, min_lat, max_lon, max_lat)

## 3. Prepare Function for Getting the Sentinel 2 L2A Data

In [48]:
def get_sentinel_data(latlong, time_slice, assets, box_size_deg=None):
    '''
    Returns VV and VH values for a given latitude and longitude 
    Attributes:
    latlong - A tuple with 2 elements - latitude and longitude
    time_slice - Timeframe for which the VV and VH values have to be extracted
    assets - A list of bands to be extracted
    '''
    # 1. extract the longitude and latitude from string
    latlong=latlong.replace('(','').replace(')','').replace(' ','').split(',')
    
    # 2. if the box_size_deg is none then the pixel area will be set to 1x1
    if box_size_deg is None:
        bbox_of_interest = (float(latlong[1]) , float(latlong[0]), float(latlong[1]) , float(latlong[0]))
    else:
        bbox_of_interest = create_bbox(latlong, box_size_deg)
    time_of_interest = time_slice
    
    # 3. access the Azure Blob Storage that holding the data
    catalog = pystac_client.Client.open(
        "https://planetarycomputer.microsoft.com/api/stac/v1"
    )
    
    # 4. search the data
    search = catalog.search(
        collections=["sentinel-2-l2a"], bbox=bbox_of_interest, datetime=time_of_interest
    )
    items = list(search.get_all_items())
    
    # 5. load the data bands
    bands_of_interest = assets
    resolution = 10  # meters per pixel 
    scale = resolution / 111320.0 # degrees per pixel for CRS:4326 
    data = stac_load(
        items,
        bands=bands_of_interest,
        crs="EPSG:4326", # Latitude-Longitude
        resolution=scale, # Degrees
        dtype="uint16",
        patch_url=pc.sign,
        bbox=bbox_of_interest,
    )
    return data

## 4. Download the Data

In [49]:
# set up time slice and assets
time_slice = "2022-01-01/2022-12-31"
assets = ['red', 'green', 'blue', 'nir', 'swir16', 'swir22', 'SCL', 'rededge']

### 1x1

#### Train Data

In [50]:
# Prepare the Folder
directory="../../data/train/sentinel-2-l2a/1-year-202201-202212/1x1"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(train_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets)
        data.to_netcdf(path)

../../data/train/sentinel-2-l2a/1-year-202201-202212/1x1 already existed


100%|██████████| 600/600 [00:00<00:00, 113033.70it/s]


#### Test Data

In [51]:
# Prepare the Folder
directory="../../data/test/sentinel-2-l2a/1-year-202201-202212/1x1"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(test_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets)
        data.to_netcdf(path)

../../data/test/sentinel-2-l2a/1-year-202201-202212/1x1 already existed


100%|██████████| 250/250 [00:00<00:00, 99864.38it/s]


### 3x3

#### Train Data

In [52]:
# Prepare the Folder
directory="../../data/train/sentinel-2-l2a/1-year-202201-202212/3x3"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(train_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_3x3)
        data.to_netcdf(path)

../../data/train/sentinel-2-l2a/1-year-202201-202212/3x3 already existed


100%|██████████| 600/600 [00:00<00:00, 114797.12it/s]


#### Test Data

In [53]:
# Prepare the Folder
directory="../../data/test/sentinel-2-l2a/1-year-202201-202212/3x3"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(test_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_3x3)
        data.to_netcdf(path)

../../data/test/sentinel-2-l2a/1-year-202201-202212/3x3 already existed


100%|██████████| 250/250 [00:00<00:00, 94059.56it/s]


### 5x5

#### Train Data

In [54]:
# Prepare the Folder
directory="../../data/train/sentinel-2-l2a/1-year-202201-202212/5x5"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(train_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_5x5)
        data.to_netcdf(path)

../../data/train/sentinel-2-l2a/1-year-202201-202212/5x5 already existed


100%|██████████| 600/600 [00:00<00:00, 116298.46it/s]


#### Test Data

In [55]:
# Prepare the Folder
directory="../../data/test/sentinel-2-l2a/1-year-202201-202212/5x5"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(test_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_5x5)
        data.to_netcdf(path)

../../data/test/sentinel-2-l2a/1-year-202201-202212/5x5 already existed


100%|██████████| 250/250 [00:00<00:00, 89081.30it/s]


### 7x7

#### Train Data

In [56]:
# Prepare the Folder
directory="../../data/train/sentinel-2-l2a/1-year-202201-202212/5x5"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(train_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets,box_size_deg_5x5)
        data.to_netcdf(path)

../../data/train/sentinel-2-l2a/1-year-202201-202212/5x5 already existed


100%|██████████| 600/600 [00:00<00:00, 113170.95it/s]


#### Test Data

In [57]:
# Prepare the Folder
directory="../../data/test/sentinel-2-l2a/1-year-202201-202212/7x7"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(test_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_7x7)
        data.to_netcdf(path)

../../data/test/sentinel-2-l2a/1-year-202201-202212/7x7 already existed


100%|██████████| 250/250 [00:00<00:00, 90449.06it/s]


### 10x10

#### Train Data

In [58]:
# Prepare the Folder
directory="../../data/train/sentinel-2-l2a/1-year-202201-202212/10x10"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(train_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets,box_size_deg_10x10)
        data.to_netcdf(path)

../../data/train/sentinel-2-l2a/1-year-202201-202212/10x10 already existed


100%|██████████| 600/600 [00:00<00:00, 109236.15it/s]


#### Test Data

In [59]:
# Prepare the Folder
directory="../../data/test/sentinel-2-l2a/1-year-202201-202212/10x10"
if not os.path.exists(directory):
    print("Creating %s" % (directory))
    os.makedirs(directory)
    print("Done")
else:
    print("%s already existed" % (directory))

# Download the Data
for coordinates in tqdm(test_data['Latitude and Longitude']):
    path = os.path.join(directory, coordinates+".nc")
    if not os.path.exists(path):
        data = get_sentinel_data(coordinates,time_slice,assets, box_size_deg_10x10)
        data.to_netcdf(path)

../../data/test/sentinel-2-l2a/1-year-202201-202212/10x10 already existed


100%|██████████| 250/250 [00:00<00:00, 100169.66it/s]
