# Retrieving Landsat 8 Images
This script will retrieve Landsat 8 level 1 images.  They will still need to be cleaned of atmospheric disruptions.  

The general tutorial that this script derives from can be found at: http://geologyandpython.com/get-landsat-8.html

In [1]:
# Import packages
import os
from glob import glob
import requests
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import folium
import geopandas as gpd
import rasterio as rio
from bs4 import BeautifulSoup
import shutil
import earthpy as et

In [2]:
# WILL DELETE WHEN FUNCTIONS ARE SEPARATED OUT
def NEON_site_extent(path_to_NEON_boundaries, site):
    '''Extracts a NEON site extent from an individual site as
    long as the original NEON site extent shape file contains 
    a column named 'siteID'.

    Parameters
    ----------
    path_to_NEON_boundaries : str
        The path to a shape file that contains the list
        of all NEON site extents, also known as field
        sampling boundaries (can be found at NEON and
        ESRI sites)

    site : str
        One siteID contains 4 capital letters, 
        e.g. CPER, HARV, ONAQ or SJER.

    Returns
    -------
    site_boundary : geopandas.geodataframe.GeoDataFrame
        A vector containing a single polygon 
        per the site specified.        
    '''
    NEON_boundaries = gpd.read_file(path_to_NEON_boundaries)
    boundaries_indexed = NEON_boundaries.set_index(['siteID'])

    site_boundary = boundaries_indexed.loc[[site]]
    site_boundary.reset_index(inplace=True)

    return site_boundary

In [3]:
# Set working directory
os.chdir(os.path.join(et.io.HOME, 'earth-analytics'))

## Find Landsat scene paths and rows of aoi
AOI boundary must be in a geopandas DataFrame 'geometry' column

In [4]:
# Download shapefile of all NEON site boundaries
url = 'https://www.neonscience.org/sites/default/files/Field_Sampling_Boundaries_2020.zip'
et.data.get_data(url=url, replace=True)


# Create path to shapefile
terrestrial_sites = os.path.join(
    'data', 'earthpy-downloads',
    'Field_Sampling_Boundaries_2020',
    'terrestrialSamplingBoundaries.shp')

# Retrieving the boundaries of site - choose CPER or ONAQ
bounds = NEON_site_extent(terrestrial_sites, 'ONAQ')

Downloading from https://www.neonscience.org/sites/default/files/Field_Sampling_Boundaries_2020.zip
Extracted output to C:\Users\Smells\earth-analytics\data\earthpy-downloads\Field_Sampling_Boundaries_2020


In [5]:
# Download Landsat 8 catalog from USGS (get_data auto unzips)
USGS_url = 'https://landsat.usgs.gov/sites/default/files/documents/WRS2_descending.zip'
et.data.get_data(url=USGS_url, replace=True)

# Open Landsat catalog
wrs = gpd.GeoDataFrame.from_file(os.path.join('data', 'earthpy-downloads',
                                              'WRS2_descending',
                                              'WRS2_descending.shp'))

wrs.head()

Downloading from https://landsat.usgs.gov/sites/default/files/documents/WRS2_descending.zip
Extracted output to C:\Users\Smells\earth-analytics\data\earthpy-downloads\WRS2_descending


Unnamed: 0,AREA,PERIMETER,PR_,PR_ID,RINGS_OK,RINGS_NOK,PATH,ROW,MODE,SEQUENCE,WRSPR,PR,ACQDayL7,ACQDayL8,geometry
0,15.74326,26.98611,1.0,1.0,1,0,13,1,D,2233,13001,13001,1,9,"POLYGON ((-10.80341 80.98880, -8.97407 80.3420..."
1,14.55366,25.84254,2.0,2.0,1,0,13,2,D,2234,13002,13002,1,9,"POLYGON ((-29.24250 80.18681, -29.29593 80.198..."
2,13.37247,24.20303,3.0,3.0,1,0,13,3,D,2235,13003,13003,1,9,"POLYGON ((-24.04206 79.12261, -23.78294 79.063..."
3,12.26691,22.40265,4.0,4.0,1,0,13,4,D,2236,13004,13004,1,9,"POLYGON ((-36.66813 77.46094, -40.05219 78.098..."
4,11.26511,20.64284,5.0,5.0,1,0,13,5,D,2237,13005,13005,1,9,"POLYGON ((-44.11210 76.93656, -44.12470 76.938..."


In [6]:
# Find polygons that intersect Landsat catalog and aoi 
wrs_intersection = wrs[wrs.intersects(bounds.geometry[0])]

In [7]:
# # Plot for sanity check
# ################################# CRS, OR LACKTHEREOF ISSUE?? ##############################
# ########### OR BECAUSE THE TWO SCENES OVERLAP THE AOI, BOTH SCENE BOUNDARIES PLOTTED? ######
# wrs_intersection.plot()

In [8]:
# Calculated paths and rows 
paths, rows = wrs_intersection['PATH'].values, wrs_intersection['ROW'].values

In [9]:
# Get the center of the map
xy = np.asarray(bounds.centroid[0].xy).squeeze()
center = list(xy[::-1])

# Select a zoom
zoom = 6

# Create the most basic OSM folium map
m = folium.Map(location=center, zoom_start=zoom, control_scale=True)

# Add the bounds GeoDataFrame in red
m.add_child(folium.GeoJson(bounds.__geo_interface__, name='Area of Study', 
                           style_function=lambda x: {'color': 'red', 'alpha': 0}))

# Iterate through each Polygon of paths and rows intersecting the area
for i, row in wrs_intersection.iterrows():
    # Create a string for the name containing the path and row of this Polygon
    name = 'path: %03d, row: %03d' % (row.PATH, row.ROW)
    # Create the folium geometry of this Polygon 
    g = folium.GeoJson(row.geometry.__geo_interface__, name=name)
    # Add a folium Popup object with the name string
    g.add_child(folium.Popup(name))
    # Add the object to the map
    g.add_to(m)

folium.LayerControl().add_to(m)
# m.save('./images/10/wrs.html')
m

In [10]:
# Removing scenes with small amounts of overlap using threshold of intersection area
b = (paths > 23) & (paths < 26)
paths = paths[b]
rows = rows[b]

In [11]:
# Path(s) and row(s) covering the intersection
############################ WHY NOT PRINTING? ###################################
for i, (path, row) in enumerate(zip(paths, rows)):
    print('Image', i+1, ' - path:', path, 'row:', row)

In [12]:
# Printing scene path and row
scene_path = row.PATH
scene_row = row.ROW

print(scene_path, scene_row)

38 32


## Section 2
Looking at landsat catalog in Amazon S3 bucket

In [13]:
# Check scene availability in Amazon S3 bucket list of Landsat scenes
s3_scenes = pd.read_csv('http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', 
                        compression='gzip', parse_dates=['acquisitionDate'], 
                        index_col=['acquisitionDate'])
s3_scenes.head(3)

Unnamed: 0_level_0,productId,entityId,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url
acquisitionDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-04-11 05:36:29.349932,LC08_L1TP_149039_20170411_20170415_01_T1,LC81490392017101LGN00,0.0,L1TP,149,39,29.22165,72.41205,31.34742,74.84666,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-04-11 15:14:40.001201,LC08_L1TP_012001_20170411_20170415_01_T1,LC80120012017101LGN00,0.15,L1TP,12,1,79.51504,-22.06995,81.90314,-7.44339,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-04-11 15:15:03.871058,LC08_L1TP_012002_20170411_20170415_01_T1,LC80120022017101LGN00,0.38,L1TP,12,2,78.74882,-29.24387,81.14549,-15.0433,https://s3-us-west-2.amazonaws.com/landsat-pds...


In [14]:
# Select only those scenes within a specified date range
scene_mask = (s3_scenes.index > '2017-10-01') & (s3_scenes.index  <= '2017-10-31') 
scene_dates = s3_scenes.loc[scene_mask]
scene_dates

Unnamed: 0_level_0,productId,entityId,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url
acquisitionDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-10-01 00:00:07.194652,LC08_L1GT_096015_20171001_20171001_01_RT,LC80960152017274LGN00,100.00,L1GT,96,15,63.05806,167.69796,65.31868,172.91439,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-01 00:00:31.077219,LC08_L1GT_096016_20171001_20171001_01_RT,LC80960162017274LGN00,99.99,L1GT,96,16,61.66909,166.65556,63.93028,171.71150,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-01 00:00:54.955550,LC08_L1GT_096017_20171001_20171001_01_RT,LC80960172017274LGN00,68.37,L1GT,96,17,60.27359,165.69214,62.53492,170.60480,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-01 00:01:18.833879,LC08_L1GT_096018_20171001_20171001_01_RT,LC80960182017274LGN00,34.02,L1GT,96,18,58.94747,165.11146,61.19757,169.28412,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-01 00:11:15.982764,LC08_L1GT_096043_20171001_20171001_01_RT,LC80960432017274LGN00,8.05,L1GT,96,43,23.48754,152.96216,25.60011,155.19237,https://s3-us-west-2.amazonaws.com/landsat-pds...
...,...,...,...,...,...,...,...,...,...,...,...
2017-10-04 14:14:00.017176,LC08_L1TP_229095_20171004_20180528_01_T1,LC82290952017277LGN01,55.87,L1TP,229,95,-51.35615,-72.02516,-49.18634,-68.61942,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-04 14:14:23.984462,LC08_L1TP_229096_20171004_20180528_01_T1,LC82290962017277LGN01,97.15,L1TP,229,96,-52.77535,-72.71214,-50.59069,-69.20309,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-18 13:51:20.062067,LC08_L1TP_231007_20171018_20180426_01_T1,LC82310072017291LGN01,19.55,L1TP,231,7,73.40659,-28.13328,75.82123,-18.60456,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-18 13:51:43.953105,LC08_L1TP_231008_20171018_20180426_01_T1,LC82310082017291LGN01,23.12,L1TP,231,8,72.21779,-30.24486,74.63946,-22.03704,https://s3-us-west-2.amazonaws.com/landsat-pds...


In [15]:
# Getting only T1 images
scene_product = scene_dates[scene_dates['productId'].str.contains("_T1")]
scene_product

Unnamed: 0_level_0,productId,entityId,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url
acquisitionDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-10-06 06:52:14.114666,LC08_L1TP_163010_20171006_20171006_01_T1,LC81630102017279LGN00,83.82,L1TP,163,10,69.69576,70.78004,72.04376,78.00355,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-06 06:52:37.997233,LC08_L1TP_163011_20171006_20171006_01_T1,LC81630112017279LGN00,79.75,L1TP,163,11,68.35313,69.02533,70.69838,76.01181,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-06 06:53:01.884035,LC08_L1TP_163012_20171006_20171006_01_T1,LC81630122017279LGN00,76.70,L1TP,163,12,67.07727,67.90116,69.42285,73.81713,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-06 06:53:25.766602,LC08_L1TP_163013_20171006_20171006_01_T1,LC81630132017279LGN00,51.83,L1TP,163,13,65.76209,66.49119,68.05661,72.23194,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-06 06:53:49.649169,LC08_L1TP_163014_20171006_20171006_01_T1,LC81630142017279LGN00,46.00,L1TP,163,14,64.40154,65.22415,66.68220,70.79017,https://s3-us-west-2.amazonaws.com/landsat-pds...
...,...,...,...,...,...,...,...,...,...,...,...
2017-10-04 14:14:00.017176,LC08_L1TP_229095_20171004_20180528_01_T1,LC82290952017277LGN01,55.87,L1TP,229,95,-51.35615,-72.02516,-49.18634,-68.61942,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-04 14:14:23.984462,LC08_L1TP_229096_20171004_20180528_01_T1,LC82290962017277LGN01,97.15,L1TP,229,96,-52.77535,-72.71214,-50.59069,-69.20309,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-18 13:51:20.062067,LC08_L1TP_231007_20171018_20180426_01_T1,LC82310072017291LGN01,19.55,L1TP,231,7,73.40659,-28.13328,75.82123,-18.60456,https://s3-us-west-2.amazonaws.com/landsat-pds...
2017-10-18 13:51:43.953105,LC08_L1TP_231008_20171018_20180426_01_T1,LC82310082017291LGN01,23.12,L1TP,231,8,72.21779,-30.24486,74.63946,-22.03704,https://s3-us-west-2.amazonaws.com/landsat-pds...


In [16]:
# Retrieving only the scenes of interest
scenes = scene_product[(scene_product.path == scene_path) & 
                       (scene_product.row == scene_row) & 
                       (scene_product.cloudCover <= 5)]
scenes

Unnamed: 0_level_0,productId,entityId,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url
acquisitionDate,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
2017-10-10 18:08:28.785931,LC08_L1TP_038032_20171010_20171024_01_T1,LC80380322017283LGN00,1.13,L1TP,38,32,39.23854,-113.38809,41.37735,-110.6059,https://s3-us-west-2.amazonaws.com/landsat-pds...


## Downloading the image of interest
The turorial walks through downloading all images that meet your specifications, but I am proceeding with the only image I want.  Later, I will need to create a search to include only months/dates of interest (e.g. Sept 2017 and T1).

This step includes using package Beautiful Soup (added to environment):
A library that makes it easy to scrape information from web pages. It sits atop an HTML or XML parser, providing Pythonic idioms for iterating, searching, and modifying the parse tree.

In [17]:
response = requests.get(scenes.iloc[0]['download_url'])

# If the response status code is fine (200)
if response.status_code == 200:

    # Import the html to beautiful soup
    html = BeautifulSoup(response.content, 'html.parser')

    # Create the dir where we will put this image files.
    entity_dir = os.path.join('data', 'Landsat', scenes.iloc[0].productId)
    os.makedirs(entity_dir, exist_ok=True)

    # Second loop: for each band of this image that we find using the html <li> tag
    for li in html.find_all('li'):

        # Get the href tag
        file = li.find_next('a').get('href')

        print('  Downloading: {}'.format(file))

        # Download the files
        # code from: https://stackoverflow.com/a/18043472/5361345

        response = requests.get(scenes.iloc[0]['download_url'].replace('index.html', file), stream=True)

        with open(os.path.join(entity_dir, file), 'wb') as output:
            shutil.copyfileobj(response.raw, output)
        del response

  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B8.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B4.TIF
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B9_wrk.IMD
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B8_wrk.IMD
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B3.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B10.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_BQA_wrk.IMD
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B2_wrk.IMD
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B7_wrk.IMD
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B7.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B6.TIF
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_MTL.txt
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B11.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_BQA.TIF.ovr
  Downloading: LC08_L1TP_038032_20171010_20171024_01_T1_B1.TIF
  Down

### NOT HAPPY WITH CURRENT WORKFLOW - NEED TO BRAINSTORM THIS OUT!
Maybe it is worth keeping the original workflow of downloading all relevant scenes for this portion of the workflow, if going to retrieve the data from figshare to save time running the blog???????

Next, I will look into classification with this Landsat scene.