Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SSL4EO Landsat Downstream Dataset/module CDL, NLCD #1338

Merged
merged 28 commits into from
May 25, 2023
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
194 changes: 194 additions & 0 deletions tests/data/ssl4eo_downstream_landsat/data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,194 @@
#!/usr/bin/env python3

# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

import hashlib
import os
import shutil
from typing import Union

import numpy as np
import rasterio
from rasterio import Affine
from rasterio.crs import CRS

IMG_DIR = "ssl4eo-l5-l1-conus"
MASK_DIR = "l5-*-2011"
MASKS = ["cdl", "nlcd"]

SUBDIRS = [("0000000", "LT05_045030_20110723"), ("0000001", "LT05_040032_20110805")]

NUM_BANDS = 7
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
SIZE = 36

np.random.seed(0)

FILENAME_HIERARCHY = Union[dict[str, "FILENAME_HIERARCHY"], list[str]]

filenames: FILENAME_HIERARCHY = {
"tm_toa": {
"0000002": {
"LE07_172034_20010526": ["all_bands.tif"],
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved
},
"0000005": {
"LE07_223084_20010413": ["all_bands.tif"],
},
},
"etm_sr": {
"0000002": {
"LE07_172034_20010526": ["all_bands.tif"],
},
"0000005": {
"LE07_223084_20010413": ["all_bands.tif"],
},
},
"etm_toa": {
"0000002": {
"LE07_172034_20010526": ["all_bands.tif"],
},
"0000005": {
"LE07_223084_20010413": ["all_bands.tif"],
},
},
"oli_tirs_toa": {
"0000002": {
"LC08_172034_20210306": ["all_bands.tif"],
},
"0000005": {
"LC08_223084_20210412": ["all_bands.tif"],
},
},
"oli_sr": {
"0000002": {
"LC08_172034_20210306": ["all_bands.tif"],
},
"0000005": {
"LC08_223084_20210412": ["all_bands.tif"],
},
},
}

num_bands = {"tm_toa": 7, "etm_sr": 6, "etm_toa": 9, "oli_tirs_toa": 11, "oli_sr": 7}
years = {"tm": 2011, "etm": 2019, "oli": 2019}

def create_image(path: str) -> None:
profile = {
"driver": "GTiff",
"dtype": "uint8",
"nodata": None,
"width": SIZE,
"height": SIZE,
"count": num_bands["_".join(path.split(os.sep)[1].split("_")[2:][:-1])],
"crs": CRS.from_epsg(4326),
"transform": Affine(
0.00037672803497508636,
0.0,
-109.07063613660262,
0.0,
-0.0002554026278261721,
47.49838726154881,
),
"blockysize": 1,
"tiled": False,
"compress": "lzw",
"interleave": "pixel",
}

Z = np.random.randint(low=0, high=255, size=(SIZE, SIZE))

with rasterio.open(path, "w", **profile) as src:
for i in src.indexes:
src.write(Z, i)


def create_mask(path: str) -> None:
profile = {
"driver": "GTiff",
"dtype": "uint8",
"nodata": None,
"width": SIZE,
"height": SIZE,
"count": 1,
"crs": CRS.from_epsg(4326),
"transform": Affine(
0.00037672803497508636,
0.0,
-109.07063613660262,
0.0,
-0.0002554026278261721,
47.49838726154881,
),
"blockysize": 1,
"tiled": False,
"compress": "lzw",
"interleave": "band",
}

Z = np.random.randint(low=0, high=10, size=(1, SIZE, SIZE))

with rasterio.open(path, "w", **profile) as src:
src.write(Z)

def create_img_directory(directory: str, hierarchy: FILENAME_HIERARCHY) -> None:
if isinstance(hierarchy, dict):
# Recursive case
for key, value in hierarchy.items():
if any([x in key for x in filenames.keys()]):
key = f"ssl4eo_l_{key}_benchmark"
path = os.path.join(directory, key)
os.makedirs(path, exist_ok=True)
create_img_directory(path, value)
else:
# Base case
for value in hierarchy:
path = os.path.join(directory, value)
create_image(path)

def create_mask_directory(directory: str, hierarchy: FILENAME_HIERARCHY, mask_product: str) -> None:
if isinstance(hierarchy, dict):
# Recursive case
for key, value in hierarchy.items():
# if any([x in key for x in filenames.keys()]):
# key = key.split("_")[0] # only keep toa, etm, oil
path = os.path.join(directory, key)
os.makedirs(path, exist_ok=True)
create_mask_directory(path, value, mask_product)
else:
# Base case
for value in hierarchy:
path = os.path.join(directory, value)
year = years[path.split(os.sep)[1].split("_")[2]]
create_mask(path.replace("all_bands", f"{mask_product}_{year}.tif"))
adamjstewart marked this conversation as resolved.
Show resolved Hide resolved

def create_tarballs(directories) -> None:
for directory in directories:
# Create tarballs
shutil.make_archive(directory, "gztar", ".", directory)

# Compute checksums
with open(f"{directory}.tar.gz", "rb") as f:
md5 = hashlib.md5(f.read()).hexdigest()
print(directory, md5)


if __name__ == "__main__":
# image directories
create_img_directory(".", filenames)
directories = filenames.keys()
directories = [f"ssl4eo_l_{key}_benchmark" for key in directories]
create_tarballs(directories)

# mask directory cdl
mask_keep = ["tm_toa", "etm_sr", "oil_sr"]
mask_filenames = {f"ssl4eo_l_{key.split('_')[0]}_cdl": val for key, val in filenames.items() if key in mask_keep}
create_mask_directory(".", mask_filenames, "cdl")
directories = mask_filenames.keys()
create_tarballs(directories)

# mask directory nlcd
mask_filenames = {f"ssl4eo_l_{key.split('_')[0]}_nlcd": val for key, val in filenames.items() if key in mask_keep}
create_mask_directory(".", mask_filenames, "nlcd")
directories = mask_filenames.keys()
create_tarballs(directories)

116 changes: 0 additions & 116 deletions tests/data/ssl4eo_downstream_landsat/l5-l1/data.py

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
Empty file.
Empty file.
Empty file.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
11 changes: 4 additions & 7 deletions tests/datasets/test_ssl4eo_downstream_landsat.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,24 @@ def download_url(url: str, root: str, *args: str, **kwargs: str) -> None:


class TestSSL4EODownstream:
@pytest.fixture(params=product([("l5-l1", 2011)], ["cdl", "nlcd"]))
@pytest.fixture(params=product(["tm_toa", "etm_toa", "etm_sr", "oli_tirs_toa", "oli_sr"], ["cdl", "nlcd"]))
def dataset(
self, monkeypatch: MonkeyPatch, tmp_path: Path, request: SubRequest
) -> SSL4EODownstream:
root = str(tmp_path)
sensor_year, mask_product = request.param
input_sensor, year = sensor_year
input_sensor, mask_product = request.param

img_dir = os.path.join(
"tests",
"data",
"ssl4eo_downstream_landsat",
input_sensor,
f"ssl4eo-{input_sensor}-conus",
f"ssl4eo_l_{input_sensor}_benchmark",
)
mask_dir = os.path.join(
"tests",
"data",
"ssl4eo_downstream_landsat",
input_sensor,
f"{input_sensor.split('-')[0]}-{mask_product}-{year}",
f"ssl4eo_l_{input_sensor.split('_')[0]}_{mask_product}",
)

shutil.copy(img_dir, root)
Expand Down
Loading
Loading