# Landsat Dataset Downloader using Landsatxplorer

**landsatxplorer** is a Python library designed for searching and downloading Landsat imagery from the USGS EarthExplorer platform. It allows users to query Landsat datasets by various parameters like location, date, and cloud cover, and download the corresponding images. This tool simplifies the process of accessing and working with satellite imagery for analysis and visualization in Python

**Information on Library:**
* Author:
* Notebook Version: 1.3 (Updated: December 03, 2024)

Although it is open-source, there are many limitations like:
* Downloads satellite images as **.tar** files, with each band stored as a separate GeoTIFF.
* Lacks functionality to filter or clip images by bounding box, requiring post-processing.
* Outdated (last updated 3 years ago); installing it may disrupt other libraries. Use pip --upgrade libraryName if issues arise.

## 1. Specifying the paths and working directories

In [1]:
import os

''' ---- Hier die Verzeichnisse angeben ---- '''
download_folder = r".\data\landsatxplorer\download"
working_folder = r".\data\landsatxplorer\working"
geotiff_folder = r".\data\landsatxplorer\geotiff"
csv_folder = r".\data\landsatxplorer\csv"
output_folder = r".\data\landsatxplorer\output"
''' ----- Ende der Eingaben ---- '''

os.makedirs(download_folder, exist_ok=True)
os.makedirs(working_folder, exist_ok=True)
os.makedirs(geotiff_folder, exist_ok=True)
os.makedirs(csv_folder, exist_ok=True)
os.makedirs(output_folder, exist_ok=True)

## 2. Download and Extract Dataset

### 2.1 Authentication

To authenticate with the USGS API, follow these steps:

1. **Register**: Create an account at the [USGS registration page](https://ers.cr.usgs.gov/register/).
2. **Obtain Credentials**: After registering, SAVE your credentials as a `.json` file.

#### Important Security Note:
- **Keep the credentials file private**: Ensure that the `.json` file containing your username and password is stored securely and cannot be accessed by unauthorized users.
- **Permissions**: Do not upload the credentials file to public repositories or share it with others.

#### Example Format of the Credentials File:
The `.json` file should have the following format:

```json
{
    "username": "your_username",
    "password": "your_password"
}


In [2]:
import json
from landsatxplore.api import API

# Load credentials from a local JSON file
with open("G:/usgs.json", "r") as file:
    credentials = json.load(file)

username = credentials["username"]
password = credentials["password"]

### 2.2 Search for the available scenes

In [3]:
api = API(username=username, password=password)

scenes = api.search(
    dataset='landsat_ot_c2_l2',
    latitude=47.6780,
    longitude=9.1737,
    start_date='2020-01-01',
    end_date='2020-10-01',
    max_cloud_cover=10
)
print(f"{len(scenes)} scenes found.")

8 scenes found.


In [4]:
import pandas as pd

rows = []
for scene in scenes:
    rows.append(
        {
            "entity_id": scene["entity_id"],
            "display_id": scene["display_id"],
            "acq_date": scene["acquisition_date"],
            "scene_cloud_cover": scene["scene_cloud_cover"],
            "land_cloud_cover": scene["land_cloud_cover"],
            # "image_quality": scene["image_quality"],
            # "day-night_indicator": scene["day-night_indicator"]
            }
        )
    
df = pd.DataFrame(rows)
df

Unnamed: 0,entity_id,display_id,acq_date,scene_cloud_cover,land_cloud_cover
0,LC81950272020256LGN00,LC08_L2SP_195027_20200912_20200919_02_T1,2020-09-12,9.18,9.18
1,LC81940272020233LGN00,LC08_L2SP_194027_20200820_20200905_02_T1,2020-08-20,5.51,5.51
2,LC81950272020224LGN00,LC08_L2SP_195027_20200811_20200918_02_T1,2020-08-11,8.02,8.02
3,LC81950272020176LGN00,LC08_L2SP_195027_20200624_20200824_02_T1,2020-06-24,3.12,3.12
4,LC81940272020153LGN00,LC08_L2SP_194027_20200601_20200824_02_T1,2020-06-01,5.08,5.08
5,LC81950272020128LGN00,LC08_L2SP_195027_20200507_20200820_02_T1,2020-05-07,1.8,1.8
6,LC81950272020096LGN00,LC08_L2SP_195027_20200405_20200822_02_T1,2020-04-05,2.01,2.01
7,LC81950272020016LGN00,LC08_L2SP_195027_20200116_20200823_02_T1,2020-01-16,1.56,1.56


In [5]:
import folium
from shapely import wkt
from matplotlib import pyplot as plt

cmap = plt.get_cmap("turbo", 20)
m = folium.Map(location=[47.5, 8.0],
               zoom_start=7,
               height=450,
               scrollWheelZoom=False)

for i, scene in enumerate(scenes):
    entity_id = scene['entity_id']
    polygon_wkt = scene["spatial_coverage"]
    if isinstance(polygon_wkt, str):
        polygon = wkt.loads(polygon_wkt)
    else:
        polygon = polygon_wkt

    color = [int(c * 255) for c in cmap(i)[:3]]
    polygon_fg = folium.FeatureGroup(name=f"str{entity_id}")
    folium.Polygon(
        locations=[(lat, lon) for lon, lat in polygon.exterior.coords],
        color=f"rgb({color[0]}, {color[1]}, {color[2]})",
        fill=True,
        fill_opacity=0.2,
    ).add_to(polygon_fg)

    polygon_fg.add_to(m)

folium.LayerControl().add_to(m)
m


### 2.3 Download the scenes

In [6]:
from landsatxplore.earthexplorer import EarthExplorer

ee = EarthExplorer(username, password)

identifier = 'LC08_L2SP_204025_20200927_20201006_02_T1'
download_filename = f"{identifier}.tar"
download_filepath = os.path.join(download_folder, download_filename)

if not os.path.isfile(download_filepath):
    ee.download(identifier=identifier,
                output_dir=download_folder,
                )
else:
    print(f"The dataset, {identifier}, is already downloaded. Skipping Download.")

api.logout()
ee.logout()

INFO:landsatxplore.earthexplorer:Successfully logged in to EarthExplorer.


The dataset, LC08_L2SP_204025_20200927_20201006_02_T1, is already downloaded. Skipping Download.


INFO:landsatxplore.earthexplorer:Logged out from EarthExplorer.


### 2.4 Extract the tar file

In [7]:
import tarfile

extract_folder = os.path.join(working_folder, identifier)
os.makedirs(extract_folder, exist_ok=True)

with tarfile.open(download_filepath) as tar:
    tar.extractall(path=extract_folder)

## 3. Merge RGB bands a single GeoTIFF file

In [8]:
file_list = os.listdir(extract_folder)
file_list

['LC08_L2SP_204025_20200927_20201006_02_T1_ANG.txt',
 'LC08_L2SP_204025_20200927_20201006_02_T1_MTL.json',
 'LC08_L2SP_204025_20200927_20201006_02_T1_MTL.txt',
 'LC08_L2SP_204025_20200927_20201006_02_T1_MTL.xml',
 'LC08_L2SP_204025_20200927_20201006_02_T1_QA_PIXEL.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_QA_RADSAT.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B1.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B2.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B3.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B4.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B5.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B6.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_B7.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_QA_AEROSOL.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_SR_stac.json',
 'LC08_L2SP_204025_20200927_20201006_02_T1_ST_ATRAN.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_ST_B10.TIF',
 'LC08_L2SP_204025_20200927_20201006_02_T1_ST_CDIS

In [9]:
# import rasterio
# import os
# from rasterio.merge import merge

# files = [
#     './data/landsatxplorer/working/LC08_L2SP_195027_20200507_20200820_02_T1_SR_B2.TIF',
#     './data/landsatxplorer/working/LC08_L2SP_195027_20200507_20200820_02_T1_SR_B3.TIF',
#     './data/landsatxplorer/working/LC08_L2SP_195027_20200507_20200820_02_T1_SR_B4.TIF'
# ]

# datasets = [rasterio.open(f) for f in files]

# merged_data, merged_transform = merge(datasets)

# metadata = datasets[0].meta
# metadata.update({
#     'count': len(datasets),  # Number of bands in the merged file
#     'transform': merged_transform
# })

# output_file = r'G:/merged.tif'

# with rasterio.open(output_file, 'w', **metadata) as dst:
#     # Loop through each band in the merged data
#     for i in range(len(datasets)):
#         dst.write(merged_data[i], i+1)  # i+1 because band indexes in GeoTIFF start from 1

# print(f"Merged GeoTIFF file saved as {output_file}")