In [1]:
from sentinelsat import SentinelAPI, read_geojson, geojson_to_wkt
import zipfile
import glob
import rasterio
import yt
from yt import derived_field
import yt.extensions.geotiff

### Download the data

In [2]:
# Copernicus scihub login details
user = 'username'
password = 'password'

In [3]:
api = SentinelAPI(
    user=user,
    password=password,
    api_url="https://scihub.copernicus.eu/dhus/",
)

geojson = read_geojson('footprint.geojson')
footprint = geojson_to_wkt(geojson)
date = ('20200527','20200603')
producttype = 'S2MSI2A'

products = api.query(footprint, date=date, producttype=producttype)

In [4]:
product_df = api.to_dataframe(products)\
                .sort_values(['cloudcoverpercentage', 'ingestiondate'], ascending=[True, True])\
                .head(1)

In [5]:
product_df[['cloudcoverpercentage', 'ingestiondate']] # hopefully we have an image with little cloud.

Unnamed: 0,cloudcoverpercentage,ingestiondate
b79201de-eb46-49d1-b084-a5b6f974aa55,5.706001,2020-06-01 19:19:50.305


In [6]:
api.download_all(product_df.index)

({'b79201de-eb46-49d1-b084-a5b6f974aa55': {'id': 'b79201de-eb46-49d1-b084-a5b6f974aa55',
   'title': 'S2A_MSIL2A_20200601T113331_N0214_R080_T30UVG_20200601T123416',
   'size': 1161457671,
   'md5': 'F32ED0101E07B737A048A877D64541BA',
   'date': datetime.datetime(2020, 6, 1, 11, 33, 31, 24000),
   'footprint': 'POLYGON((-4.6014404 55.93532706141405,-2.8437195 55.945635321533295,-2.8475952 54.9590033589594,-4.561981 54.94906599057673,-4.6014404 55.93532706141405))',
   'url': "https://scihub.copernicus.eu/dhus/odata/v1/Products('b79201de-eb46-49d1-b084-a5b6f974aa55')/$value",
   'Online': True,
   'Creation Date': datetime.datetime(2020, 6, 1, 19, 20, 8, 102000),
   'Ingestion Date': datetime.datetime(2020, 6, 1, 19, 19, 50, 305000),
   'path': './S2A_MSIL2A_20200601T113331_N0214_R080_T30UVG_20200601T123416.zip',
   'downloaded_bytes': 0}},
 {},
 {})

### Unzip and export 20m resolution bands to GeoTIFF

In [7]:
s2zipfile = product_df.title.tolist()[0] + '.zip'
with zipfile.ZipFile(s2zipfile, 'r') as zip_ref:
    zip_ref.extractall('.')
s2ds = product_df.title.tolist()[0] + '.SAFE'

In [8]:
s2r20m_files = sorted(glob.glob(s2ds + '/GRANULE/**/IMG_DATA/R20m/*_20m.jp2', recursive=True))

In [9]:
bands = {}
ignored_bands = ['AOT', 'TCI', 'WVP'] # we just want the numbered bands and the scene classification
for f in s2r20m_files:
    label = f.split('_')[-2]
    if any([label == ignored_band for ignored_band in ignored_bands]): continue
    bands[label] = rasterio.open(f, driver='JP2OpenJPEG')

In [10]:
#export multiband geotiff image
filename = product_df.title.tolist()[0] + '_20m.tif'
geotiff = rasterio.open(filename,'w',driver='Gtiff',
                         width=bands['B04'].width, height=bands['B04'].height,
                         count=len(bands.keys()),
                         crs=bands['B04'].crs,
                         transform=bands['B04'].transform,
                         dtype=bands['B04'].dtypes[0]
                         )
for i, label in enumerate(bands.keys()):
    geotiff.write(bands[label].read(1).astype(bands['B04'].dtypes[0]), i + 1)
geotiff.close()

In [11]:
list(bands.keys()) # this order

['B02', 'B03', 'B04', 'B05', 'B06', 'B07', 'B11', 'B12', 'B8A', 'SCL']

We have produced a geotiff from the Sentinel 2 20m resolution data we downloaded. The next step could be to load it into yt and see what we get!

### Read GeoTIFF with yt

In [12]:
# the file namefilename = 'S2A_MSIL2A_20200601T113331_N0214_R080_T30UVG_20200601T123416_20m.tif'
ds = yt.load(filename)

yt : [INFO     ] 2020-06-30 15:36:20,812 Parameters: current_time              = 0.0
yt : [INFO     ] 2020-06-30 15:36:20,813 Parameters: domain_dimensions         = [5490 5490    1]
yt : [INFO     ] 2020-06-30 15:36:20,814 Parameters: domain_left_edge          = [0. 0. 0.] m
yt : [INFO     ] 2020-06-30 15:36:20,815 Parameters: domain_right_edge         = [1.0981e+05 1.0981e+05 1.0000e+00] m
yt : [INFO     ] 2020-06-30 15:36:20,817 Parameters: cosmological_simulation   = False


In [13]:
ds.field_list

[('bands', '1'),
 ('bands', '10'),
 ('bands', '2'),
 ('bands', '3'),
 ('bands', '4'),
 ('bands', '5'),
 ('bands', '6'),
 ('bands', '7'),
 ('bands', '8'),
 ('bands', '9')]

In [14]:
ds.parameters

{'driver': 'GTiff',
 'dtype': 'uint16',
 'nodata': None,
 'width': 5490,
 'height': 5490,
 'count': 10,
 'crs': CRS.from_epsg(32630),
 'transform': Affine(20.0, 0.0, 399960.0,
        0.0, -20.0, 6200040.0)}

### Normalised Difference Vegetation Index (NDVI)

Here is an example of one thing we can easily calculate once the data is loaded into yt. The [Normalised Difference Vegetation Index](https://en.wikipedia.org/wiki/Normalized_difference_vegetation_index)! This is used to monitor vegetation density and health by comparing the reflectance of red (R) and near infrared (NIR) bands.
For our case here the NIR band is the 9 band in the GeoTIFF we created and the R band is 3.

In [15]:
@derived_field(name="ndvi", units="", force_override=True, display_name='NDVI', take_log=False)
def _ndvi(field, data):
    return (data[('bands', '9')] - data[('bands', '3')]) /(data[('bands', '9')] + data[('bands', '3')])



In [16]:
ds.add_field("ndvi", function=_ndvi, units="")

  ds.add_field("ndvi", function=_ndvi, units="")


In [17]:
ad = ds.all_data()

In [18]:
sp = ds.sphere(ds.domain_center, ds.domain_width[0]/4)
p = yt.SlicePlot(ds, 'z', 'ndvi', data_source=sp)
p.set_cmap(field="ndvi", cmap='RdYlGn')
p.set_log("ndvi", False)
p.zoom(4)

  out_arr = func(np.asarray(inps[0]), np.asarray(inps[1]),
yt : [INFO     ] 2020-06-30 15:36:22,687 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:22,688 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:22,689 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:22,691 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:22,707 Making a fixed resolution buffer of (('bands', 'ndvi')) 800 by 800
  norm = matplotlib.colors.SymLogNorm(
yt : [INFO     ] 2020-06-30 15:36:26,492 Making a fixed resolution buffer of (('bands', 'ndvi')) 800 by 800


### Normalised Difference Water Index (NDWI)
Similarly to the NDVI, we can calculate the NDWI to estimate the water content of vegetation in our satellite image (see Gao 1996 formula for [NDWI](https://en.wikipedia.org/wiki/Normalized_difference_water_index)). 

In [19]:
@derived_field(name="ndwi", units="", force_override=True, display_name='NDWI', take_log=False)
def _ndwi(field, data):
    return (data[('bands', '9')] - data[('bands', '7')]) /(data[('bands', '9')] + data[('bands', '7')])



In [20]:
ds.add_field("ndwi", function=_ndwi, units="")

  ds.add_field("ndwi", function=_ndwi, units="")


In [21]:
ad = ds.all_data()

In [22]:
sp = ds.sphere(ds.domain_center, ds.domain_width[0]/4)
p = yt.SlicePlot(ds, 'z', 'ndwi', data_source=sp)
p.set_cmap(field="ndwi", cmap='RdYlBu')
p.set_log("ndwi", False)
p.zoom(4)

yt : [INFO     ] 2020-06-30 15:36:28,670 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:28,670 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:28,671 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:28,673 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:28,676 Making a fixed resolution buffer of (('bands', 'ndwi')) 800 by 800
  norm = matplotlib.colors.SymLogNorm(
yt : [INFO     ] 2020-06-30 15:36:31,535 Making a fixed resolution buffer of (('bands', 'ndwi')) 800 by 800


### Scene Classification
When we are dealing with optical earth observation data there can be clouds obscuring our field of view. Fortunately, L2 Sentinel 2 data comes with a 20m scene classification mask which labels each pixel as belonging to 1 of 12 classes (see [here](https://earth.esa.int/web/sentinel/technical-guides/sentinel-2-msi/level-2a/algorithm) for more information on the model used to classify the pixels). Of course this classification model isn't perfect but it can be used to get a decent cloud mask.
0: NODATA  
1: SATURATED_OR_DEFECTIVE  
2: DARK_AREA_PIXELS  
3: CLOUD_SHADOWS  
4: VEGETATION  
5: NOT_VEGETATED  
6: WATER  
7: UNCLASSIFIED  
8: CLOUD_MEDIUM_PROBABILITY  
9: CLOUD_HIGH_PROBABILITY  
10: THIN_CIRRUS  
11: SNOW  

In [23]:
sp = ds.sphere(ds.domain_center, ds.domain_width[0]/4)
p = yt.SlicePlot(ds, 'z', '10', data_source=sp)
p.set_cmap(field="10", cmap='tab10')
p.set_log("10", False)
p.zoom(4)

yt : [INFO     ] 2020-06-30 15:36:32,774 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:32,775 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:32,776 xlim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:32,778 ylim = 0.000000 109810.000000
yt : [INFO     ] 2020-06-30 15:36:32,780 Making a fixed resolution buffer of (('bands', '10')) 800 by 800
yt : [INFO     ] 2020-06-30 15:36:35,606 Making a fixed resolution buffer of (('bands', '10')) 800 by 800


Let's look at how the two indices compare in vegetation...

In [24]:
veg = sp.cut_region(['obj["10"] == 4'])  # single out vegetation
plot = yt.ProfilePlot(veg, "ndwi", "ndvi",
                      weight_field="ndvi")
plot.set_log("ndvi", False)
plot.set_log("ndwi", False)
plot

  out_arr = func(np.asarray(inps[0]), np.asarray(inps[1]),
