# Tutorial 1: Landcover Classification using Landsat 8

In [1]:
import sys  
sys.path.insert(0, '/Users/sbhadra/raster4ml')

In [2]:
import os
import glob
from raster4ml.preprocessing import stack_bands
from raster4ml.plotting import plot_raster
from raster4ml.features import VegetationIndices
from raster4ml.extraction import batch_extract_values_by_points

## 1. Stack the Bands

First we need to stack all the bands together and make a multispectral image file. The mutispectral image will contain several channels/bands representing reflectance information from different wavelengths. Since the test dataset is downloaded from a Landsat 8 satellite, there are total 11 bands. However, we will only use the first 7 bands as they can accurately define most of the surface objects in terms of reflectance.

To stack the seperate bands into one image, we need to define the paths of all the bands in chronological order (actually any order you want, but remember the orders for future reference).

In [3]:
# Filter all the files that ends with .TIF
image_dir = '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1'

# Empty list to hold the first 7 bands' paths
bands_to_stack = []
# Loop through 7 times
for i in range(7):
    bands_to_stack.append(os.path.join(image_dir,
                                       f'LC08_L1TP_137045_20210317_20210328_01_T1_B{i+1}.TIF'))
bands_to_stack

['/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B1.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B2.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B3.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B4.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B5.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B6.TIF',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/LC08_L1TP_137045_20210317_20210328_01_T1_B7.TIF']

In [4]:
# Use the stack_bands function from raster4ml to do the stacking
stack_bands(image_paths=bands_to_stack,
            out_file='/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/stack.tif')

## 2. Calculate Vegetation Indices

In next step, we need to calculate the vegetation indices from the stacked image. We can do this using `raster4ml.features.VegetationIndices` object. You can provide a list of vegetation index we need to calculate in the object, but the tool can automatically calcualte all the possible vegetation index rasters.  

To do this, we need to provide the path of the stacked image, the corresponding wavelength values and an output directory to save all the indices as rasters. Since this is a Landsat 8 OLI image, we know the band wavelengths. The wavelengths can be inserted as either the `center_wavelengths` as list or the range of wavelengths per band in a list of list. The wavelengths has to be specified in nanometers (nm). The Landsat 8 OLI wavelengths can be seen [here](https://www.usgs.gov/faqs/what-are-band-designations-landsat-satellites).  

*Optionally we can provide the `bit_depth` as a parameter. Since we know Landsat 8 data is a 12-bit data, we can provide this information to normalize the image values from 0 to 1.

In [3]:
# Define the VegetationIndices object
VI = VegetationIndices(image_path='/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/stack.tif',
                       wavelengths=[[430, 450], [450, 510], [530, 590], [640, 670], [850, 880], [1570, 1650], [2110, 2290]],
                       bit_depth=12)

In [4]:
# Run the process while providing the output directory
VI.calculate(out_dir='/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI')

Calculating all features


  return 2.5*((self.R850-self.R660)/(self.R850+(6.*self.R660)-(7.5*self.R475)+1.))
 25%|██▌       | 14/56 [00:30<01:12,  1.73s/it]

'tuple' object has no attribute 'reshape'


 34%|███▍      | 19/56 [00:41<01:10,  1.91s/it]

unsupported operand type(s) for -: 'float' and 'NoneType'


  return (self.R750-self.R705)/(self.R750+self.R705-2.*self.R445)
  return (self.R750-self.R445)/(self.R705-self.R445)
 54%|█████▎    | 30/56 [01:06<00:56,  2.16s/it]

unsupported operand type(s) for /: 'NoneType' and 'float'


 62%|██████▎   | 35/56 [01:09<00:21,  1.01s/it]

unsupported operand type(s) for /: 'float' and 'NoneType'
unsupported operand type(s) for -: 'float' and 'NoneType'
unsupported operand type(s) for /: 'float' and 'NoneType'


 70%|██████▉   | 39/56 [01:14<00:16,  1.02it/s]

unsupported operand type(s) for -: 'float' and 'NoneType'


  return (self.R800-self.R445)/(self.R800-self.R680)
  return (self.R734-self.R747)/(self.R715-self.R726)
 98%|█████████▊| 55/56 [01:45<00:01,  1.26s/it]

unsupported operand type(s) for /: 'NoneType' and 'float'


100%|██████████| 56/56 [01:47<00:00,  1.92s/it]

8 features could not be calculated.





## 3. Extract Values based on Sample Points

Locate the sample point shapefile in the `data/shapes` folder. The name of the shapefile is `points.shp`. We need to extract the vegetation index values underneath each point in the shapefile and store those index values for Machine Learning training. The shapefile also contains label information. For simplicity, it only has two distinct classes, i.e., `Vegetation` and `Water`.  

For extraction by points, we can use the `raster4ml.extraction.batch_extract_values_by_points` function. This will enable extraction of multiple raster data at once. The function takes `image_paths` as a list, `shape_path` as a string, and a `unique_id` in the shapefile which uniquely represent each point. The function returns a pandas dataframe.

In [3]:
# Find the paths of all the vegetation indices
vi_paths = glob.glob('/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/*.tif')
vi_paths

['/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/OSAVI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/CRI_1.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/VARI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/GDVI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/NMDI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/RENDVI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/TVI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/CRI_2.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/SR_1.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/GSAVI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/VI/GLI.tif',
 '/Users/sbhadra/raster4ml/data/LC08_L1

In [4]:
# Batch extract values by points
values = batch_extract_values_by_points(image_paths=vi_paths,
                                        shape_path='/Users/sbhadra/raster4ml/data/LC08_L1TP_137045_20210317_20210328_01_T1/shapes/points.shp',
                                        unique_id='UID')

  0%|          | 0/48 [00:00<?, ?it/s]ERROR 1: PROJ: proj_identify: /Users/sbhadra/opt/anaconda3/envs/raster4ml/share/proj/proj.db lacks DATABASE.LAYOUT.VERSION.MAJOR / DATABASE.LAYOUT.VERSION.MINOR metadata. It comes from another PROJ installation.
  0%|          | 0/48 [00:00<?, ?it/s]


ValueError: Must pass either crs or epsg.