In [1]:
import pandas as pd
import geopandas as gpd
import os, shutil

In [2]:
state = 'ID'
productionyear = '2020' # as string

# Specify the date range as a string. 
# We want peak leaf-on imagery, but also need to account for the last date of any large fires in that state
lowmonth,lowday = '06','01'
highmonth,highday = '10','15'

In [4]:
# Get the state boundary and landsat path/row file to select scenes for download
# get the county shapefile to join fires to state name for later use
bounds = gpd.read_file(os.path.join(r'D:\data\imagery\state_shp',state + '.shp'))
countiesfile = gpd.GeoDataFrame.from_file(r'D:\data\imagery\counties.gpkg')
wrs = gpd.GeoDataFrame.from_file(r'D:\data\imagery\wrs2_descending.shp')

# State files need to be the same projection as the WRS file, else this will not work correctly
wrs_intersection = wrs[wrs.intersects(bounds.geometry[0])]

In [5]:
# Load the fire perimeter data
# old code used US GEOMAC perimeter files

# New fire file from NIFC
fire_file = gpd.GeoDataFrame.from_file(r'https://opendata.arcgis.com/datasets/5da472c6d27b4b67970acc7b5044c862_0.geojson')

# Write the perimeter file with the state/county name for later use
fire_file_state =  gpd.sjoin(fire_file, countiesfile, how='inner', op='within')###
fire_file_state['STATE'] = fire_file_state.order01
fire_file_state['YEAR'] = fire_file_state.DateCurrent.str[:4]

fire_file.to_file(r"D:\data\imagery\Public_NIFS_Perimeters.shp")
fire_file_state.to_file(r"D:\data\imagery\Public_NIFS_Perimeters_county.shp")

# Clip the fires to the state boundary
fire = clip(fire_file,bounds)

  outputs = ufunc(*inputs)
Given a GeoSeries 's', you can use '~s.is_empty & s.notna()' to get back the old behaviour.

  return self.notna()


In [6]:
# Select the fires that intersect to later determine the needed imagery date
fires =  gpd.sjoin(fire, wrs, how='inner', op='within')###

# sort dataframe by most recent date
fires['enddate'] = fires['DateCurrent']+' 00:00:00.000000'

# empty gdf for most recent fire perimeter data 
recent_fire = gpd.GeoDataFrame()

# select just fires in the state, make lowercase for consistent matching
fires.IncidentName = fires.IncidentName.str.lower()
fires = fires[['IncidentName','PATH','ROW','enddate']]
fires['PR'] = fires['PATH'].astype(str)+' ' +fires['ROW'].astype(str)

# for each fire, pick the latest date
for firename in fires['IncidentName']:
    rec_fire = fires.loc[(fires.IncidentName == firename)]
    rec_fire['enddate'].sort_values()
    rec_fire = rec_fire.tail(1)
    recent_fire = recent_fire.append(rec_fire)
    
# then find the latest fire date for the path/row - select just the most recent for each scene
pr_date = gpd.GeoDataFrame()
for pr in recent_fire['PR'].unique():
    prdate = recent_fire.loc[(recent_fire.PR == pr)]
    prdate['enddate'].sort_values()
    prdate = prdate.tail(1)
    pr_date = pr_date.append(prdate)


In [7]:
paths, rows = wrs_intersection['PATH'].values, wrs_intersection['ROW'].values

In [8]:
for i, (path, row) in enumerate(zip(paths, rows)):
    print('Image', i+1, ' - path:', path, 'row:', row)

('Image', 1, ' - path:', 43, 'row:', 26)
('Image', 2, ' - path:', 43, 'row:', 27)
('Image', 3, ' - path:', 43, 'row:', 28)
('Image', 4, ' - path:', 43, 'row:', 29)
('Image', 5, ' - path:', 41, 'row:', 27)
('Image', 6, ' - path:', 41, 'row:', 28)
('Image', 7, ' - path:', 41, 'row:', 29)
('Image', 8, ' - path:', 41, 'row:', 30)
('Image', 9, ' - path:', 41, 'row:', 31)
('Image', 10, ' - path:', 39, 'row:', 29)
('Image', 11, ' - path:', 39, 'row:', 30)
('Image', 12, ' - path:', 39, 'row:', 31)
('Image', 13, ' - path:', 44, 'row:', 26)
('Image', 14, ' - path:', 42, 'row:', 26)
('Image', 15, ' - path:', 42, 'row:', 27)
('Image', 16, ' - path:', 42, 'row:', 28)
('Image', 17, ' - path:', 42, 'row:', 29)
('Image', 18, ' - path:', 42, 'row:', 30)
('Image', 19, ' - path:', 42, 'row:', 31)
('Image', 20, ' - path:', 40, 'row:', 28)
('Image', 21, ' - path:', 40, 'row:', 29)
('Image', 22, ' - path:', 40, 'row:', 30)
('Image', 23, ' - path:', 40, 'row:', 31)
('Image', 24, ' - path:', 38, 'row:', 29)
(

In [9]:
# Load the Landsat metadata file - use this to select scenes for download
s3_scenes = pd.read_csv('http://landsat-pds.s3.amazonaws.com/c1/L8/scene_list.gz', compression='gzip')

In [None]:
# Un-comment to re-run for a path/scene below with new dates
# Use previous year - certain states like WA have some scenes where there is high cloudcover most of the time - unavoidable
#paths,rows = [48],[28]
#productionyear = '2019'

In [10]:
def fire_scene():
    # Check if the Path/Row has a recent fire, use the fire's end date for the datelow L8 scene search
    pr = str(path) +' '+ str(row)
    prloc = pr_date.loc[(pr_date.PR == pr)]
    if prloc.shape[0] < 1:
        print 'No fire - daterange unchanged'
    else:
        datelow = prloc['enddate'].values[0]
        print "Fire occured - new scene daterange " + datelow[:-15] +'to '+ datehigh[:-16]

# bulk download list
bulk_list = []
not_found = []
n = 0

# Find scenes for each path/row
for path, row in zip(paths, rows):
    n = n + 1
    print n

    ## Define the thresholds for date range and cloud cover:
    datelow = productionyear+'-'+lowmonth+'-'+lowday+' 00:00:00.000000'
    datehigh = productionyear+'-'+highmonth+'-'+highday+' 00:00:00.000000'
    cloudcover = 10

    print('Path:', path, 'Row:', row)
    
    # Change the daterange if there's a fire perimeter in that scene
    fire_scene()

    # Filter the Landsat Amazon S3 table for images matching path/row and cloudcover parameters.
    #while productionyear
    tries = 10
    while tries >= 10 and tries <= 90:
        if tries > 10:
            ntries = tries/10
            cloudcover = tries
            print 'Try #' + str(ntries) +': '+ str(cloudcover) + '% cloudcover threshold'
        scenes = s3_scenes[(s3_scenes.path == path) & (s3_scenes.row == row) & 
                           (s3_scenes.cloudCover <= cloudcover) & 
                           (s3_scenes.acquisitionDate >= datelow) & 
                           (s3_scenes.acquisitionDate <= datehigh) &
                           # We don't want any tier2/uncorrected data
                           (~s3_scenes.productId.str.contains('_T2')) &
                           (~s3_scenes.productId.str.contains('_RT'))]
        print(' Found {} images\n'.format(len(scenes)))
        if len(scenes) == 0:
            tries = tries + 10
            print 'Retry with higher cloudcover threshold:'
        else: tries = 100
            
    # Select the scenes that meet the date and cloud cover criteria
    if len(scenes)>0:
        # select a scene in the middle of the date ranges if possible- similar to LandSat Retriever
        sc =  len(scenes)
        sd = sc / 2
        sl = sc - sd
        if sd > 2 and sl < 2:
            sl = -1
        else:
            sl = sl * -1
        
        # pick the middle date scene
        scene = scenes.sort_values('acquisitionDate').iloc[sl]
        
        # Add the selected scene to the bulk download list.
        bulk_list.append(scene)
    else:
        # if there are no scenes found even after altering the cloudcover threshold, create a list (find manually)
        print 'No scenes were selected for this path/row'
        nf = str(path) + ',' + str(row)
        not_found.append(nf)


1
('Path:', 43, 'Row:', 26)
Fire occured - new scene daterange 2020-09-25T23:33:53 to 2020-10-15
 Found 3 images

2
('Path:', 43, 'Row:', 27)
Fire occured - new scene daterange 2020-09-22T13:41:49 to 2020-10-15
 Found 2 images

3
('Path:', 43, 'Row:', 28)
No fire - daterange unchanged
 Found 4 images

4
('Path:', 43, 'Row:', 29)
No fire - daterange unchanged
 Found 4 images

5
('Path:', 41, 'Row:', 27)
No fire - daterange unchanged
 Found 5 images

6
('Path:', 41, 'Row:', 28)
Fire occured - new scene daterange 2020-09-24T19:42:26 to 2020-10-15
 Found 5 images

7
('Path:', 41, 'Row:', 29)
Fire occured - new scene daterange 2020-10-14T17:17:39 to 2020-10-15
 Found 6 images

8
('Path:', 41, 'Row:', 30)
Fire occured - new scene daterange 2020-10-13T22:19:35 to 2020-10-15
 Found 5 images

9
('Path:', 41, 'Row:', 31)
Fire occured - new scene daterange 2020-09-15T15:35:45 to 2020-10-15
 Found 7 images

10
('Path:', 39, 'Row:', 29)
Fire occured - new scene daterange 2020-10-19T17:46:17 to 2020

In [11]:
bulk_frame = pd.concat(bulk_list, 1).T
nf_frame = pd.DataFrame(not_found)
nf_frame.to_csv((os.path.join(r'D:\data\imagery\state_shp', state + 'scenes_missing.txt')),sep='\t', index=False, header=False)
bulk_frame.head(100)

Unnamed: 0,productId,entityId,acquisitionDate,cloudCover,processingLevel,path,row,min_lat,min_lon,max_lat,max_lon,download_url
2062594,LC08_L1TP_043026_20200818_20200823_01_T1,LC80430262020231LGN00,2020-08-18 18:36:45.447299,9.2,L1TP,43,26,47.7727,-118.232,49.9284,-115.041,https://s3-us-west-2.amazonaws.com/landsat-pds...
2125844,LC08_L1TP_043027_20201005_20201015_01_T1,LC80430272020279LGN00,2020-10-05 18:37:26.170171,1.04,L1TP,43,27,46.3566,-118.79,48.5043,-115.657,https://s3-us-west-2.amazonaws.com/landsat-pds...
2102270,LC08_L1TP_043028_20200903_20200917_01_T1,LC80430282020247LGN00,2020-09-03 18:37:41.028538,0.02,L1TP,43,28,44.9306,-119.287,47.081,-116.208,https://s3-us-west-2.amazonaws.com/landsat-pds...
2102613,LC08_L1TP_043029_20200903_20200917_01_T1,LC80430292020247LGN00,2020-09-03 18:38:04.915342,0.01,L1TP,43,29,43.4999,-119.775,45.6553,-116.75,https://s3-us-west-2.amazonaws.com/landsat-pds...
2071298,LC08_L1TP_041027_20200804_20200821_01_T1,LC80410272020217LGN00,2020-08-04 18:24:42.635326,0.13,L1TP,41,27,46.3484,-115.628,48.5229,-112.64,https://s3-us-west-2.amazonaws.com/landsat-pds...
2072477,LC08_L1TP_041028_20200804_20200821_01_T1,LC80410282020217LGN00,2020-08-04 18:25:06.522128,0.1,L1TP,41,28,44.9345,-116.12,47.0975,-113.201,https://s3-us-west-2.amazonaws.com/landsat-pds...
2101097,LC08_L1TP_041029_20200905_20200918_01_T1,LC80410292020249LGN00,2020-09-05 18:25:44.101542,0.33,L1TP,41,29,43.5174,-116.609,45.6721,-113.751,https://s3-us-west-2.amazonaws.com/landsat-pds...
2072506,LC08_L1TP_041030_20200804_20200821_01_T1,LC80410302020217LGN00,2020-08-04 18:25:54.291500,0.02,L1TP,41,30,42.0989,-117.051,44.2442,-114.256,https://s3-us-west-2.amazonaws.com/landsat-pds...
2072453,LC08_L1TP_041031_20200804_20200821_01_T1,LC80410312020217LGN00,2020-08-04 18:26:18.174067,0.0,L1TP,41,31,40.6777,-117.509,42.8168,-114.755,https://s3-us-west-2.amazonaws.com/landsat-pds...
2087307,LC08_L1TP_039029_20200822_20200905_01_T1,LC80390292020235LGN00,2020-08-22 18:13:15.980041,1.64,L1TP,39,29,43.5041,-113.579,45.6545,-110.568,https://s3-us-west-2.amazonaws.com/landsat-pds...


In [None]:
# Option 1 - get the scene list to upload to earthexplorer.usgs.gov/filelist
# Also prints a list of scenes where no matching imagery was found
bulklist = bulk_frame[['entityId']]
bulklist.to_csv((os.path.join(r'Z:\tiger\wrs2_descending', state + 'pathrowlist.txt')),sep='\t', index=False, header=False)
bulk_frame.to_csv((os.path.join(r'Z:\tiger\wrs2_descending', state + 'frame.txt')),sep='\t', index=False)

In [12]:
# Option 2 - download the data directly
import requests
from bs4 import BeautifulSoup

LANDSAT_PATH = os.path.join(r'Z:\tiger', state, 'l8imagery')

# For each row
for i, row in bulk_frame.iterrows():
    if os.path.isdir(os.path.join(LANDSAT_PATH, row.productId)): 
        print'Skipping' + entity_dir + ' as it already exists'
    else:
        # Print the product ID
        print('\n', 'EntityId:', row.productId, '\n')
        print(' Checking content: ', '\n')

        # Request the html text of the download_url from the amazon server. 
        response = requests.get(row.download_url)

        # If the response status code is fine (200)
        if response.status_code == 200:

            # Import the html to beautiful soup
            html = BeautifulSoup(response.content, 'html.parser')

            # Create the dir where we will put this image files.
            entity_dir = os.path.join(LANDSAT_PATH, row.productId)
            if not os.path.exists(entity_dir):
                os.makedirs(entity_dir)


            # Second loop: for each band of this image that we find using the html <li> tag
            for li in html.find_all('li'):

                # Get the href tag
                file = li.find_next('a').get('href')

                filestring = str(file)
                filen = os.path.join(LANDSAT_PATH,entity_dir,filestring)

                # only download the .tif and metadata files
                if filestring[-4:] == '.TIF' or filestring[-8:] == '_MTL.txt' or filestring[-8:] == '_ANG.txt':
                    if not os.path.isfile(os.path.join(filen)):
                        print('  Downloading: {}'.format(file))

                        # Download the files
                        response = requests.get(row.download_url.replace('index.html', file), stream=True)

                        with open(os.path.join(entity_dir, file), 'wb') as output:
                            shutil.copyfileobj(response.raw, output)
                        del response
                    else: print filestring + ' exists'

('\n', 'EntityId:', 'LC08_L1TP_043026_20200818_20200823_01_T1', '\n')
(' Checking content: ', '\n')
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B10.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B3.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B4.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B7.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_MTL.txt
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B1.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B2.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_BQA.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_ANG.txt
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B8.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B9.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B5.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B6.TIF
  Downloading: LC08_L1TP_043026_20200818_20200823_01_T1_B11.TIF
('\n', 'Entit

  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B10.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B1.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B5.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B3.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B11.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B9.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_BQA.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B8.TIF
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_MTL.txt
  Downloading: LC08_L1TP_041031_20200804_20200821_01_T1_B4.TIF
('\n', 'EntityId:', 'LC08_L1TP_039029_20200822_20200905_01_T1', '\n')
(' Checking content: ', '\n')
  Downloading: LC08_L1TP_039029_20200822_20200905_01_T1_B11.TIF
  Downloading: LC08_L1TP_039029_20200822_20200905_01_T1_B3.TIF
  Downloading: LC08_L1TP_039029_20200822_20200905_01_T1_B2.TIF
  Downloading: LC08_L1TP_039029_20200822_20200905_01_T1_B7.TIF
  Downloading

  Downloading: LC08_L1TP_042029_20200811_20200822_01_T1_B4.TIF
  Downloading: LC08_L1TP_042029_20200811_20200822_01_T1_ANG.txt
  Downloading: LC08_L1TP_042029_20200811_20200822_01_T1_B2.TIF
  Downloading: LC08_L1TP_042029_20200811_20200822_01_T1_B6.TIF
  Downloading: LC08_L1TP_042029_20200811_20200822_01_T1_B10.TIF
('\n', 'EntityId:', 'LC08_L1TP_042030_20200811_20200822_01_T1', '\n')
(' Checking content: ', '\n')
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_BQA.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B8.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B10.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B2.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B11.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B6.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_MTL.txt
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B1.TIF
  Downloading: LC08_L1TP_042030_20200811_20200822_01_T1_B5.TIF
  Downloadin

('\n', 'EntityId:', 'LC08_L1TP_038031_20200815_20200822_01_T1', '\n')
(' Checking content: ', '\n')
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B1.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B2.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B11.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B4.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B5.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B9.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_ANG.txt
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B10.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_MTL.txt
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B7.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B8.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B6.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_B3.TIF
  Downloading: LC08_L1TP_038031_20200815_20200822_01_T1_BQA.TIF
