### Parallel extraction of training features
This script uses ground truth information to combine it with Sentinel-2 data.


In [None]:
#
import dask.distributed
import folium
import folium.plugins
import geopandas as gpd
import shapely.geometry
from IPython.display import HTML, display
from pystac_client import Client
from datacube.utils.cog import write_cog
import gc
from odc.stac import configure_rio, stac_load
import planetary_computer
import planetary_computer as pc
from dea_tools.classification import collect_training_data
import xarray as xr
import numpy as np

# MS catalog variable
catalog = Client.open(
    "https://planetarycomputer.microsoft.com/api/stac/v1/",
    modifier=planetary_computer.sign_inplace)
# ESA catalog
#catalog = Client.open(
#    "https://catalogue.dataspace.copernicus.eu/stac/")
# E84 catalog see https://element84.com/geospatial/introducing-earth-search-v1-new-datasets-now-available/
catalog = Client.open("https://earth-search.aws.element84.com/v1/")
# DLR geoservices, currently no daily Sentinel-2 data Level 3 WASP
#catalog = Client.open(
#    "https://geoservice.dlr.de/eoc/ogc/stac/v1/")

In [None]:
def calculate_indices(ds,
                      index=None,
                      collection=None,
                      custom_varname=None,
                      normalise=True,
                      drop=False,
                      inplace=False):
    """
    Takes an xarray dataset containing spectral bands, calculates one of
    a set of remote sensing indices, and adds the resulting array as a 
    new variable in the original dataset.  
    
    Note: by default, this function will create a new copy of the data
    in memory. This can be a memory-expensive operation, so to avoid
    this, set `inplace=True`.

    Last modified: March 2021
    
    Parameters
    ----------
    ds : xarray Dataset
        A two-dimensional or multi-dimensional array with containing the
        spectral bands required to calculate the index. These bands are
        used as inputs to calculate the selected water index.
    index : str or list of strs
        A string giving the name of the index to calculate or a list of
        strings giving the names of the indices to calculate:
        'AWEI_ns (Automated Water Extraction Index,
                  no shadows, Feyisa 2014)
        'AWEI_sh' (Automated Water Extraction Index,
                   shadows, Feyisa 2014)
        'BAEI' (Built-Up Area Extraction Index, Bouzekri et al. 2015)
        'BAI' (Burn Area Index, Martin 1998)
        'BSI' (Bare Soil Index, Rikimaru et al. 2002)
        'BUI' (Built-Up Index, He et al. 2010)
        'CMR' (Clay Minerals Ratio, Drury 1987)
        'EVI' (Enhanced Vegetation Index, Huete 2002)
        'FMR' (Ferrous Minerals Ratio, Segal 1982)
        'IOR' (Iron Oxide Ratio, Segal 1982)
        'LAI' (Leaf Area Index, Boegh 2002)
        'MNDWI' (Modified Normalised Difference Water Index, Xu 1996)
        'MSAVI' (Modified Soil Adjusted Vegetation Index,
                 Qi et al. 1994)              
        'NBI' (New Built-Up Index, Jieli et al. 2010)
        'NBR' (Normalised Burn Ratio, Lopez Garcia 1991)
        'NDBI' (Normalised Difference Built-Up Index, Zha 2003)
        'NDCI' (Normalised Difference Chlorophyll Index, 
                Mishra & Mishra, 2012)
        'NDMI' (Normalised Difference Moisture Index, Gao 1996)        
        'NDSI' (Normalised Difference Snow Index, Hall 1995)
        'NDTI' (Normalise Difference Tillage Index,
                Van Deventeret et al. 1997)
        'NDVI' (Normalised Difference Vegetation Index, Rouse 1973)
        'NDWI' (Normalised Difference Water Index, McFeeters 1996)
        'SAVI' (Soil Adjusted Vegetation Index, Huete 1988)
        'TCB' (Tasseled Cap Brightness, Crist 1985)
        'TCG' (Tasseled Cap Greeness, Crist 1985)
        'TCW' (Tasseled Cap Wetness, Crist 1985)
        'WI' (Water Index, Fisher 2016)
        'kNDVI' (Non-linear Normalised Difference Vegation Index,
                 Camps-Valls et al. 2021)
    collection : str
        An string that tells the function what data collection is 
        being used to calculate the index. This is necessary because 
        different collections use different names for bands covering 
        a similar spectra. Valid options are 'ga_ls_2' (for GA 
        Landsat Collection 2), 'ga_ls_3' (for GA Landsat Collection 3) 
        and 'ga_s2_1' (for GA Sentinel 2 Collection 1).
    custom_varname : str, optional
        By default, the original dataset will be returned with 
        a new index variable named after `index` (e.g. 'NDVI'). To 
        specify a custom name instead, you can supply e.g. 
        `custom_varname='custom_name'`. Defaults to None, which uses
        `index` to name the variable. 
    normalise : bool, optional
        Some coefficient-based indices (e.g. 'WI', 'BAEI', 'AWEI_ns', 
        'AWEI_sh', 'TCW', 'TCG', 'TCB', 'EVI', 'LAI', 'SAVI', 'MSAVI') 
        produce different results if surface reflectance values are not 
        scaled between 0.0 and 1.0 prior to calculating the index. 
        Setting `normalise=True` first scales values to a 0.0-1.0 range
        by dividing by 10000.0. Defaults to True.  
    drop : bool, optional
        Provides the option to drop the original input data, thus saving 
        space. if drop = True, returns only the index and its values.
    inplace: bool, optional
        If `inplace=True`, calculate_indices will modify the original
        array in-place, adding bands to the input dataset. The default
        is `inplace=False`, which will instead make a new copy of the
        original data (and use twice the memory).
        
    Returns
    -------
    ds : xarray Dataset
        The original xarray Dataset inputted into the function, with a 
        new varible containing the remote sensing index as a DataArray.
        If drop = True, the new variable/s as DataArrays in the 
        original Dataset. 
    """
    
    # Set ds equal to a copy of itself in order to prevent the function 
    # from editing the input dataset. This can prevent unexpected
    # behaviour though it uses twice as much memory.    
    if not inplace:
        ds = ds.copy(deep=True)
    
    # Capture input band names in order to drop these if drop=True
    if drop:
        bands_to_drop=list(ds.data_vars)
        print(f'Dropping bands {bands_to_drop}')

    # Dictionary containing remote sensing index band recipes
    index_dict = {
                  # Normalised Difference Vegation Index, Rouse 1973
                  'NDVI': lambda ds: (ds.nir - ds.red) /
                                     (ds.nir + ds.red),
        
                  # NDVI, after Rouse 1973 using Sentinel-2 8A band
                  'NDVI8a': lambda ds: (ds.narrow_nir - ds.red) /
                                     (ds.narrow_nir + ds.red),
        
                  
        
        
        
        
                  # Non-linear Normalised Difference Vegation Index,
                  # Camps-Valls et al. 2021
                  'kNDVI': lambda ds: np.tanh(((ds.nir - ds.red) /
                                               (ds.nir + ds.red)) ** 2),

                  # Enhanced Vegetation Index, Huete 2002
                  'EVI': lambda ds: ((2.5 * (ds.nir - ds.red)) /
                                     (ds.nir + 6 * ds.red -
                                      7.5 * ds.blue + 1)),

                  # Leaf Area Index, Boegh 2002
                  'LAI': lambda ds: (3.618 * ((2.5 * (ds.nir - ds.red)) /
                                     (ds.nir + 6 * ds.red -
                                      7.5 * ds.blue + 1)) - 0.118),

                  # Soil Adjusted Vegetation Index, Huete 1988
                  'SAVI': lambda ds: ((1.5 * (ds.nir - ds.red)) /
                                      (ds.nir + ds.red + 0.5)),
      
                  # Mod. Soil Adjusted Vegetation Index, Qi et al. 1994
                  'MSAVI': lambda ds: ((2 * ds.nir + 1 - 
                                      ((2 * ds.nir + 1)**2 - 
                                       8 * (ds.nir - ds.red))**0.5) / 2),    

                  # Normalised Difference Moisture Index, Gao 1996
                  'NDMI': lambda ds: (ds.nir - ds.swir1) /
                                     (ds.nir + ds.swir1),

                  # Normalised Burn Ratio, Lopez Garcia 1991
                  'NBR': lambda ds: (ds.nir - ds.swir2) /
                                    (ds.nir + ds.swir2),

                  # Burn Area Index, Martin 1998
                  'BAI': lambda ds: (1.0 / ((0.10 - ds.red) ** 2 +
                                            (0.06 - ds.nir) ** 2)),
        
                 # Normalised Difference Chlorophyll Index, 
                 # (Mishra & Mishra, 2012)
                  'NDCI': lambda ds: (ds.red_edge1 - ds.red) /
                                     (ds.red_edge1 + ds.red),

                  # Normalised Difference Snow Index, Hall 1995
                  'NDSI': lambda ds: (ds.green - ds.swir1) /
                                     (ds.green + ds.swir1),

                  # Normalised Difference Tillage Index,
                  # Van Deventer et al. 1997
                  'NDTI': lambda ds: (ds.swir1 - ds.swir2) /
                                     (ds.swir1 + ds.swir2),

                  # Normalised Difference Water Index, McFeeters 1996
                  'NDWI': lambda ds: (ds.green - ds.nir) /
                                     (ds.green + ds.nir),

                  # Modified Normalised Difference Water Index, Xu 2006
                  'MNDWI': lambda ds: (ds.green - ds.swir1) /
                                      (ds.green + ds.swir1),
      
                  # Normalised Difference Built-Up Index, Zha 2003
                  'NDBI': lambda ds: (ds.swir1 - ds.nir) /
                                     (ds.swir1 + ds.nir),
      
                  # Built-Up Index, He et al. 2010
                  'BUI': lambda ds:  ((ds.swir1 - ds.nir) /
                                      (ds.swir1 + ds.nir)) -
                                     ((ds.nir - ds.red) /
                                      (ds.nir + ds.red)),
      
                  # Built-up Area Extraction Index, Bouzekri et al. 2015
                  'BAEI': lambda ds: (ds.red + 0.3) /
                                     (ds.green + ds.swir1),
      
                  # New Built-up Index, Jieli et al. 2010
                  'NBI': lambda ds: (ds.swir1 + ds.red) / ds.nir,
      
                  # Bare Soil Index, Rikimaru et al. 2002
                  'BSI': lambda ds: ((ds.swir1 + ds.red) - 
                                     (ds.nir + ds.blue)) / 
                                    ((ds.swir1 + ds.red) + 
                                     (ds.nir + ds.blue)),

                  # Automated Water Extraction Index (no shadows), Feyisa 2014
                  'AWEI_ns': lambda ds: (4 * (ds.green - ds.swir1) -
                                        (0.25 * ds.nir * + 2.75 * ds.swir2)),

                  # Automated Water Extraction Index (shadows), Feyisa 2014
                  'AWEI_sh': lambda ds: (ds.blue + 2.5 * ds.green -
                                         1.5 * (ds.nir + ds.swir1) -
                                         0.25 * ds.swir2),

                  # Water Index, Fisher 2016
                  'WI': lambda ds: (1.7204 + 171 * ds.green + 3 * ds.red -
                                    70 * ds.nir - 45 * ds.swir1 -
                                    71 * ds.swir2),
        ## Tasseled Cap
                  # Tasseled Cap Transformations are influenced by the sensor
                  # I.e. Crist (1985) converted MSS (after Kauth and Thoma, 1976)
                  # values to TM values which are used here:
        
                  # Tasseled Cap Wetness, Crist 1985
                  'TCW': lambda ds: (0.0315 * ds.blue + 0.2021 * ds.green +
                                     0.3102 * ds.red + 0.1594 * ds.nir +
                                    -0.6806 * ds.swir1 + -0.6109 * ds.swir2),

                  # Tasseled Cap Greeness, Crist 1985
                  'TCG': lambda ds: (-0.1603 * ds.blue + -0.2819 * ds.green +
                                     -0.4934 * ds.red + 0.7940 * ds.nir +
                                     -0.0002 * ds.swir1 + -0.1446 * ds.swir2),

                  # Tasseled Cap Brightness, Crist 1985
                  'TCB': lambda ds: (0.2043 * ds.blue + 0.4158 * ds.green +
                                     0.5524 * ds.red + 0.5741 * ds.nir +
                                     0.3124 * ds.swir1 + -0.2303 * ds.swir2),
        
                  # Tasseled Cap coefficients for Sentinel-2 are calculated using Gram-Schmidt orthogonalization (GSO)
                  # by Nedkov, 2017 and using a Procrustes Analysis (PCP) by Shi, 2019, for values see csv-file:

        """Bands,"GSO (Nedkov, 2017)",,,"PCP (Shi, 2019)",,,,,,,,
,B, G, W, B, G, W,,,,,,
B1-Coastal,0.0356,-0.0635,0.0649,0.2381,-0.2266,0.1825,,,,,,
B2-Blue,0.0822,-0.1128,0.1363,0.2569,-0.2818,0.1763,,,,,,
B3-Green,0.136,-0.168,0.2802,0.2934,-0.302,0.1615,,,,,,
B4-Red,0.2611,-0.348,0.3072,0.302,-0.4283,0.0486,,,,,,
B5-RE-1,0.2964,-0.3303,0.5288,0.3099,-0.2959,0.017,,,,,, 
B6-RE-2,0.3338,0.0852,0.1379,0.374,0.1602,0.0223,,,,,,
B7-RE-3,0.3877,0.3302,-0.0001,0.418,0.3127,0.0219,,,,,,
B8-NIR-1,0.3895,0.3165,-0.0807,0.358,0.3138,-0.0755,,,,,,
B8A-NIR-2,0.475,0.3625,-0.1389,0.3834,0.4261,-0.091,,,,,,
B9-WV  ,0.0949,0.0467,-0.0302,0.0103,0.1454,-0.1369,,,,,,
B10-Cirrus,0.0009,-0.0009,0.0003,0.002,-0.0017,0.0003,,,,,,
B11-SWIR-1,0.3882,-0.4587,-0.4064,0.0896,-0.1341,-0.771,,,,,,
B12-SWIR-2,0.1366,-0.4064,-0.5602,0.078,-0.2538,-0.5293,,,,,,
"""
        
                 # Tasseled Cap Wetness, GSO after Nedkov
                  'TCW_GSO': lambda ds: (0.0649 * ds.blue + 0.2802 * ds.green +
                                     0.3072 * ds.red + -0.0807 * ds.nir +
                                    -0.4064 * ds.swir1 + -0.5602 * ds.swir2),

                  # Tasseled Cap Greeness, GSO
                  'TCG_GSO': lambda ds: (-0.0635 * ds.blue + -0.168 * ds.green +
                                     -0.348 * ds.red + 0.3895 * ds.nir +
                                     -0.4587 * ds.swir1 + -0.4064 * ds.swir2),

                  # Tasseled Cap Brightness, GSO
                  'TCB_GSO': lambda ds: (0.0822 * ds.blue + 0.136 * ds.green +
                                     0.2611 * ds.red + 0.5741 * ds.nir +
                                     0.3882 * ds.swir1 + 0.1366 * ds.swir2),
        
        
                  # Clay Minerals Ratio, Drury 1987
                  'CMR': lambda ds: (ds.swir1 / ds.swir2),

                  # Ferrous Minerals Ratio, Segal 1982
                  'FMR': lambda ds: (ds.swir1 / ds.nir),

                  # Iron Oxide Ratio, Segal 1982
                  'IOR': lambda ds: (ds.red / ds.blue)
    }
    
    # If index supplied is not a list, convert to list. This allows us to
    # iterate through either multiple or single indices in the loop below
    indices = index if isinstance(index, list) else [index]
    
    #calculate for each index in the list of indices supplied (indexes)
    for index in indices:

        # Select an index function from the dictionary
        index_func = index_dict.get(str(index))

        # If no index is provided or if no function is returned due to an 
        # invalid option being provided, raise an exception informing user to 
        # choose from the list of valid options
        if index is None:

            raise ValueError(f"No remote sensing `index` was provided. Please "
                              "refer to the function \ndocumentation for a full "
                              "list of valid options for `index` (e.g. 'NDVI')")

        elif (index in ['WI', 'BAEI', 'AWEI_ns', 'AWEI_sh', 'TCW', 
                        'TCG', 'TCB', 'EVI', 'LAI', 'SAVI', 'MSAVI'] 
              and not normalise):

            warnings.warn(f"\nA coefficient-based index ('{index}') normally "
                           "applied to surface reflectance values in the \n"
                           "0.0-1.0 range was applied to values in the 0-10000 "
                           "range. This can produce unexpected results; \nif "
                           "required, resolve this by setting `normalise=True`")

        elif index_func is None:

            raise ValueError(f"The selected index '{index}' is not one of the "
                              "valid remote sensing index options. \nPlease "
                              "refer to the function documentation for a full "
                              "list of valid options for `index`")

        # Rename bands to a consistent format if depending on what collection
        # is specified in `collection`. This allows the same index calculations
        # to be applied to all collections. If no collection was provided, 
        # raise an exception.
        if collection is None:

            raise ValueError("'No `collection` was provided. Please specify "
                             "either 'ga_ls_2', 'ga_ls_3' or 'ga_s2_1' \nto "
                             "ensure the function calculates indices using the "
                             "correct spectral bands")

        elif collection == 'ga_ls_3':

            # Dictionary mapping full data names to simpler 'red' alias names
            bandnames_dict = {
                'nbart_nir': 'nir',
                'nbart_red': 'red',
                'nbart_green': 'green',
                'nbart_blue': 'blue',
                'nbart_swir_1': 'swir1',
                'nbart_swir_2': 'swir2',
                'nbar_red': 'red',
                'nbar_green': 'green',
                'nbar_blue': 'blue',
                'nbar_nir': 'nir',
                'nbar_swir_1': 'swir1',
                'nbar_swir_2': 'swir2'
            }

            # Rename bands in dataset to use simple names (e.g. 'red')
            bands_to_rename = {
                a: b for a, b in bandnames_dict.items() if a in ds.variables
            }

        elif collection == 'ga_s2_1':

            # Dictionary mapping full data names to simpler 'red' alias names
            bandnames_dict = {
                'nbart_red': 'red',
                'nbart_green': 'green',
                'nbart_blue': 'blue',
                'nbart_nir_1': 'nir',
                'nbart_red_edge_1': 'red_edge_1', 
                'nbart_red_edge_2': 'red_edge_2',    
                'nbart_swir_2': 'swir1',
                'nbart_swir_3': 'swir2',
                'nbar_red': 'red',
                'nbar_green': 'green',
                'nbar_blue': 'blue',
                'nbar_nir_1': 'nir',
                'nbar_red_edge_1': 'red_edge_1',   
                'nbar_red_edge_2': 'red_edge_2',   
                'nbar_swir_2': 'swir1',
                'nbar_swir_3': 'swir2'
            }

            # Rename bands in dataset to use simple names (e.g. 'red')
            bands_to_rename = {
                a: b for a, b in bandnames_dict.items() if a in ds.variables
            }

        elif collection == 'ga_ls_2':

            # Pass an empty dict as no bands need renaming
            bands_to_rename = {}

        # Raise error if no valid collection name is provided:
        else:
            raise ValueError(f"'{collection}' is not a valid option for "
                              "`collection`. Please specify either \n"
                              "'ga_ls_2', 'ga_ls_3' or 'ga_s2_1'")

        # Apply index function 
        try:
            # If normalised=True, divide data by 10,000 before applying func
            mult = 10000.0 if normalise else 1.0
            index_array = index_func(ds.rename(bands_to_rename) / mult)
        except AttributeError:
            raise ValueError(f'Please verify that all bands required to '
                             f'compute {index} are present in `ds`. \n'
                             f'These bands may vary depending on the `collection` '
                             f'(e.g. the Landsat `nbart_nir` band \n'
                             f'is equivelent to `nbart_nir_1` for Sentinel 2)')

        # Add as a new variable in dataset
        output_band_name = custom_varname if custom_varname else index
        ds[output_band_name] = index_array
    
    # Once all indexes are calculated, drop input bands if inplace=False
    if drop and not inplace:
        ds = ds.drop(bands_to_drop)

    # If inplace == True, delete bands in-place instead of using drop
    if drop and inplace:
        for band_to_drop in bands_to_drop:
            del ds[band_to_drop]

    # Return input dataset with added water index variable
    return ds

In [None]:
x_min = 9
x_max = 11
y_min = 50
y_max = 51

# build polygon
area_of_interest = {
    "type": "Polygon",
    "coordinates": [ 
        [
            [x_max, y_min],
            [x_min, y_min],
            [x_min, y_max],
            [x_max, y_max],
            [x_max, y_min],
        ]
    ],}
# build bounding box
bbox = [x_min,y_min,x_max,y_max]


In [None]:
# time variable 
time_of_interest = "2019-04"

search = catalog.search(
    #collections=["sentinel-2-l2a"], # MS setting
    collections=["sentinel-2-l2a"], # E84 setting
    #collections=["SENTINEL-2"], # ESA setting
    #collections=["S2_L3A_WASP"], # DLR geoservices
    #intersects=area_of_interest, # MS setting
    bbox=bbox, # global
    limit=1000,
    datetime=time_of_interest,
    
    #query={"eo:cloud_cover": {"lt": 100}},
)

# Check how many items were returned
items = search.item_collection()
print(f"Returned {len(items)} Items")

In [None]:
#path = 'reinbestaende_deci_conf.shp'
path = 'rein_laub_nadel.zip'
field = 'class'
products = 's2_l2a'
zonal_stats = 'median' # if extension
resolution =  (-10, 10)
query = {
    'output_crs' : 'EPSG:25832'
}

### Feature extraction for monthly data
The functions needs to be adopted to different STAC catalogs

In [None]:
def feature_jan_median(query):    
    time_of_interest = "2019-01"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['jan_ca'] = ds05['coastal_aerosol']
    ds05['jan_b'] = ds05['blue']
    ds05['jan_g'] = ds05['green']
    ds05['jan_r'] = ds05['red']
    ds05['jan_r1'] = ds05['red_edge1']
    ds05['jan_r2'] = ds05['red_edge2']
    ds05['jan_r3'] = ds05['red_edge3']
    ds05['jan_n'] = ds05['nir'] 
    ds05['jan_nn'] = ds05['narrow_nir']
    ds05['jan_wv'] = ds05['water_vapour']
    ds05['jan_s1'] = ds05['swir1']
    ds05['jan_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['jan_NDVI'] = ds05['NDVI']
    ds05['jan_kNDVI'] = ds05['kNDVI']
    ds05['jan_NDVI8a'] = ds05['NDVI8a']
    ds05['jan_EVI'] = ds05['EVI']
    ds05['jan_TCG_GSO'] = ds05['TCG_GSO']
    ds05['jan_TCG'] = ds05['TCG']
    ds05['jan_LAI'] = ds05['LAI']
    ds05['jan_SAVI'] = ds05['SAVI']
    ds05['jan_MSAVI'] = ds05['MSAVI']
    ds05['jan_BUI'] = ds05['BUI']
    ds05['jan_NDBI'] = ds05['NDBI']
    ds05['jan_NDMI'] = ds05['NDMI']
    ds05['jan_BAEI'] = ds05['BAEI']
    ds05['jan_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_jan_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-01_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-01_mspc.csv", sep=",", index=False)

In [None]:
def feature_feb_median(query):    
    time_of_interest = "2019-02"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['feb_ca'] = ds05['coastal_aerosol']
    ds05['feb_b'] = ds05['blue']
    ds05['feb_g'] = ds05['green']
    ds05['feb_r'] = ds05['red']
    ds05['feb_r1'] = ds05['red_edge1']
    ds05['feb_r2'] = ds05['red_edge2']
    ds05['feb_r3'] = ds05['red_edge3']
    ds05['feb_n'] = ds05['nir'] 
    ds05['feb_nn'] = ds05['narrow_nir']
    ds05['feb_wv'] = ds05['water_vapour']
    ds05['feb_s1'] = ds05['swir1']
    ds05['feb_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['feb_NDVI'] = ds05['NDVI']
    ds05['feb_kNDVI'] = ds05['kNDVI']
    ds05['feb_NDVI8a'] = ds05['NDVI8a']
    ds05['feb_EVI'] = ds05['EVI']
    ds05['feb_TCG_GSO'] = ds05['TCG_GSO']
    ds05['feb_TCG'] = ds05['TCG']
    ds05['feb_LAI'] = ds05['LAI']
    ds05['feb_SAVI'] = ds05['SAVI']
    ds05['feb_MSAVI'] = ds05['MSAVI']
    ds05['feb_BUI'] = ds05['BUI']
    ds05['feb_NDBI'] = ds05['NDBI']
    ds05['feb_NDMI'] = ds05['NDMI']
    ds05['feb_BAEI'] = ds05['BAEI']
    ds05['feb_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=30,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_feb_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-02_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-02_mspc.csv", sep=",", index=False)

In [None]:
def feature_mar_median(query):    
    time_of_interest = "2019-03"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['mar_ca'] = ds05['coastal_aerosol']
    ds05['mar_b'] = ds05['blue']
    ds05['mar_g'] = ds05['green']
    ds05['mar_r'] = ds05['red']
    ds05['mar_r1'] = ds05['red_edge1']
    ds05['mar_r2'] = ds05['red_edge2']
    ds05['mar_r3'] = ds05['red_edge3']
    ds05['mar_n'] = ds05['nir'] 
    ds05['mar_nn'] = ds05['narrow_nir']
    ds05['mar_wv'] = ds05['water_vapour']
    ds05['mar_s1'] = ds05['swir1']
    ds05['mar_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['mar_NDVI'] = ds05['NDVI']
    ds05['mar_kNDVI'] = ds05['kNDVI']
    ds05['mar_NDVI8a'] = ds05['NDVI8a']
    ds05['mar_EVI'] = ds05['EVI']
    ds05['mar_TCG_GSO'] = ds05['TCG_GSO']
    ds05['mar_TCG'] = ds05['TCG']
    ds05['mar_LAI'] = ds05['LAI']
    ds05['mar_SAVI'] = ds05['SAVI']
    ds05['mar_MSAVI'] = ds05['MSAVI']
    ds05['mar_BUI'] = ds05['BUI']
    ds05['mar_NDBI'] = ds05['NDBI']
    ds05['mar_NDMI'] = ds05['NDMI']
    ds05['mar_BAEI'] = ds05['BAEI']
    ds05['mar_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_mar_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-03_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-03_mspc.csv", sep=",", index=False)

In [None]:
def feature_apr_median(query):    
    time_of_interest = "2019-04"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['apr_ca'] = ds05['coastal_aerosol']
    ds05['apr_b'] = ds05['blue']
    ds05['apr_g'] = ds05['green']
    ds05['apr_r'] = ds05['red']
    ds05['apr_r1'] = ds05['red_edge1']
    ds05['apr_r2'] = ds05['red_edge2']
    ds05['apr_r3'] = ds05['red_edge3']
    ds05['apr_n'] = ds05['nir'] 
    ds05['apr_nn'] = ds05['narrow_nir']
    ds05['apr_wv'] = ds05['water_vapour']
    ds05['apr_s1'] = ds05['swir1']
    ds05['apr_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['apr_NDVI'] = ds05['NDVI']
    ds05['apr_kNDVI'] = ds05['kNDVI']
    ds05['apr_NDVI8a'] = ds05['NDVI8a']
    ds05['apr_EVI'] = ds05['EVI']
    ds05['apr_TCG_GSO'] = ds05['TCG_GSO']
    ds05['apr_TCG'] = ds05['TCG']
    ds05['apr_LAI'] = ds05['LAI']
    ds05['apr_SAVI'] = ds05['SAVI']
    ds05['apr_MSAVI'] = ds05['MSAVI']
    ds05['apr_BUI'] = ds05['BUI']
    ds05['apr_NDBI'] = ds05['NDBI']
    ds05['apr_NDMI'] = ds05['NDMI']
    ds05['apr_BAEI'] = ds05['BAEI']
    ds05['apr_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=30,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_apr_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-04_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-04_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-05"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=30,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-05_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-05_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-06"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-06_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-06_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-07"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-07_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-07_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-08"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-08_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-08_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-09"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-09_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-09_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-10"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-10_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-10_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-11"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-11_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-11_mspc.csv", sep=",", index=False)

In [None]:
def feature_may_median(query):    
    time_of_interest = "2019-12"
    search = catalog.search(collections=["sentinel-2-l2a"],
                            intersects=area_of_interest,
                            datetime=time_of_interest,
                            #query={"eo:cloud_cover": {"lt": 100}},
    )
    items = search.item_collection()
    
    ds05 = stac_load(groupby="solar_day",
                   items=items,
                   #chunks={},
                  **query)
    ds05=ds05.where(ds05.SCL.isin([4, 5, 6, 7,11]))
    ds05=ds05.drop_vars('SCL')
    ds05 = ds05.median('time')
    
    # band wise renaming
    ds05['coastal_aerosol'] = ds05['B01'] 
    ds05['blue'] = ds05['B02']
    ds05['green'] = ds05['B03']
    ds05['red'] = ds05['B04']
    ds05['red_edge1'] = ds05['B05']
    ds05['red_edge2'] = ds05['B06']
    ds05['red_edge3'] = ds05['B07']
    ds05['nir'] = ds05['B08']
    ds05['narrow_nir'] = ds05['B8A']
    ds05['water_vapour'] = ds05['B09']
    ds05['swir1'] = ds05['B11']
    ds05['swir2'] = ds05['B12']
    
    
    ds05 = calculate_indices(ds05,index=['NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI',
                                         'MSAVI','BUI','NDBI','NDMI','BAEI','BSI'], collection='ga_s2_1')

     # band wise renaming
    ds05['may_ca'] = ds05['coastal_aerosol']
    ds05['may_b'] = ds05['blue']
    ds05['may_g'] = ds05['green']
    ds05['may_r'] = ds05['red']
    ds05['may_r1'] = ds05['red_edge1']
    ds05['may_r2'] = ds05['red_edge2']
    ds05['may_r3'] = ds05['red_edge3']
    ds05['may_n'] = ds05['nir'] 
    ds05['may_nn'] = ds05['narrow_nir']
    ds05['may_wv'] = ds05['water_vapour']
    ds05['may_s1'] = ds05['swir1']
    ds05['may_s2'] = ds05['swir2']
    # vi wise renaming
    ds05['may_NDVI'] = ds05['NDVI']
    ds05['may_kNDVI'] = ds05['kNDVI']
    ds05['may_NDVI8a'] = ds05['NDVI8a']
    ds05['may_EVI'] = ds05['EVI']
    ds05['may_TCG_GSO'] = ds05['TCG_GSO']
    ds05['may_TCG'] = ds05['TCG']
    ds05['may_LAI'] = ds05['LAI']
    ds05['may_SAVI'] = ds05['SAVI']
    ds05['may_MSAVI'] = ds05['MSAVI']
    ds05['may_BUI'] = ds05['BUI']
    ds05['may_NDBI'] = ds05['NDBI']
    ds05['may_NDMI'] = ds05['NDMI']
    ds05['may_BAEI'] = ds05['BAEI']
    ds05['may_BSI'] = ds05['BSI']
    
    ds05=ds05.drop_vars(['B01','B02','B03','B04','B05','B06','B07','B08','B09','B8A','B12','B11', 
                    'coastal_aerosol','blue','green','red','red_edge1','red_edge2','red_edge3', 
                    'nir','narrow_nir','water_vapour','swir1','swir2', 'AOT', 'visual', 'WVP', 
                    'NDVI', 'kNDVI', 'NDVI8a', 'EVI', 'TCG_GSO','TCG','LAI','SAVI','MSAVI','BUI', 
                    'NDBI', 'NDMI','BAEI','BSI'
                    ])
    
    return ds05#.compute()		

In [None]:
#input_data = gpd.read_file(path).to_crs('epsg:4326')
input_data = gpd.read_file(path).to_crs('EPSG:25832')

column_names, model_input = collect_training_data(
                                    gdf=input_data,
                                    dc_query=query,
                                    ncpus=40,
                                    field=field,
                                    return_coords=True,
                                    zonal_stats=zonal_stats,
                                    feature_func=feature_may_median,
                                    max_retries=5
                                    )

In [None]:
import numpy as np
output_file = "2019-12_mspc.txt"
model_col_indices = [column_names.index(var_name) for var_name in column_names]
np.savetxt(output_file, model_input[:, model_col_indices], header=" ".join(column_names), fmt="%4f")

import pandas as pd
pd.DataFrame(model_input, columns=column_names).to_csv("2019-12_mspc.csv", sep=",", index=False)