In [4]:
# Sentinel Hub Config

from env_vars import sentinel_hub_instance_id
from sentinelhub import SHConfig

# Import Area of Interest List

import pandas as pd
import json
from scripts.mgrs import encode,LLtoUTM


# Sentinel Hub Tile Look Up / Download

from sentinelhub import WebFeatureService, BBox, CRS, DataSource, AwsTileRequest


# Cloud Masking

import rasterio as rio
import numpy as np
import earthpy.mask as em

# Generate Product Detail DataFrame

import os
from glob import glob
import xml.etree.ElementTree as ET


# Sort / Organize Tiles by Individual Folders

from shutil import copyfile


# Extract Polygon crops from products

import pandas as pd
from shapely.geometry import Polygon
import geopandas as gpd
from geopandas import GeoDataFrame
import earthpy.spatial as es



In [15]:
def shub_connect(sentinel_hub_instance_id):

    INSTANCE_ID = sentinel_hub_instance_id  

    if INSTANCE_ID:
        config = SHConfig()
        config.instance_id = INSTANCE_ID
    else:
        config = None
        
    return config

In [16]:
config = shub_connect(sentinel_hub_instance_id)

In [20]:
def import_aois(csv_loc):    

    df_labels = pd.read_csv(csv_loc)
    df_labels = df_labels[["center-lat","center-long","polygon"]][0:33]

    polygons = []
    for polygon in df_labels["polygon"]:
        polygons.append(json.loads(polygon)["coordinates"])

    coordinates = []
    for items in polygons:
        for item in items:
            for lon_lat in item:
                coordinates.append(lon_lat)

    #bounding box

    min_lon = min([i[0] for i in coordinates])
    min_lat = min([i[1] for i in coordinates])
    max_lon = max([i[0] for i in coordinates])
    max_lat = max([i[1] for i in coordinates])

    bounding_box = min_lon,min_lat,max_lon,max_lat


    tiles = []
    for ll in coordinates:
        tiles.append(encode(LLtoUTM(ll[1],ll[0]),1)[:-2])

    tiles = list(set(tiles))
    return bounding_box,tiles

In [25]:
bounding_box,tile_list = import_aois("./data/labelled/labels_Misha_v2.csv")

In [26]:
def get_shub_tiles(output_dir,bands="R10m/TCI",search_time_interval = ('2019-01-01T00:00:00', '2020-12-31T23:59:59'),
                   product_type = "SENTINEL2_L2A",bounding_box = bounding_box,tile_list = tile_list):
    #Additional Params
    bands = ['R10m/TCI']

    #Misha's Tiles of Interest
    search_bbox = BBox(bbox=bounding_box, crs=CRS.WGS84)

    search_time_interval = ('2019-01-01T00:00:00', '2020-12-31T23:59:59')
    wfs_iterator = WebFeatureService(
        search_bbox,
        search_time_interval,
        data_source=DataSource.SENTINEL2_L2A,
        maxcc=.05,
        config=config
    )
    results = wfs_iterator.get_tiles()
    df = pd.DataFrame(results, columns=['Tilename','Date','AmazonID'])
    df_tiles_of_interest = df[df["Tilename"].isin(tile_list)]
    df2 = df_tiles_of_interest.groupby('Tilename').head(10)
    output2 = list(df2.itertuples(index=False,name=None))
    for tile in output2:
        tile_name, time, aws_index = tile

        #Download SAFE Files
        request = AwsTileRequest(
            tile=tile_name,
            time=time,
            bands = bands, 
            aws_index=aws_index,
            data_folder=output_dir,
            data_source=DataSource.SENTINEL2_L2A,
            safe_format = True
        )

        request.save_data(redownload=True)

In [None]:
get_shub_tiles("/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/test")

In [188]:
def cloud_mask_tci(src_dir):
    
    msk_file_path = glob(src_dir + "*/*/MSK_CLDPRB_20m.jp2")[0]
    tci_file_path = glob(src_dir + "*/IMG_DATA/R10m/*.jp2")[0]
    tci_filename = tci_file_path.split("/")[-1]
    output_tci_file_path = src_dir + "/IMG_DATA/R10m/" + "processed_" + tci_filename 

    nodatavalue = int(0)

    with rio.open(tci_file_path) as sen_TCI_src:
        sen_TCI = sen_TCI_src.read(masked=True)
        sen_TCI_meta = sen_TCI_src.meta

    with rio.open(msk_file_path) as sen_mask_src:
        sen_mask_pre = sen_mask_src.read(1)
        sen_mask = np.repeat(np.repeat(sen_mask_pre,2,axis=0),2,axis=1)

    # All pixels above 0 probability will be classified as True

    sen_mask_qa = sen_mask > 0


    # Apply mask to source TCI file
    if np.count_nonzero(sen_mask_qa) > 0:
        sen_TCI_cl_free_nan = em.mask_pixels(sen_TCI, sen_mask_qa)
        sen_TCI_cl_free_processed = np.ma.filled(sen_TCI_cl_free_nan, fill_value=nodatavalue)
    else:
        sen_TCI_c1_free_processed = sen_mask_qa


    # Export cloud-masked TCI file
    with rio.open(output_tci_file_path, 'w', **sen_TCI_meta) as outf:
        outf.write(sen_TCI_cl_free_processed)

In [189]:
def apply_mask_tci_safe_list(tci_folder_list):
    dir_list = glob(tci_folder_list + "/*" )
    
    
    for directory in dir_list:
        cloud_mask_tci(directory)
        
    print(f"Applied masks to {len(dir_list)} products")

In [190]:
tci_folder_list = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/test_raw"
apply_mask_tci_safe_list(tci_folder_list)

Applied masks to 4 products


In [5]:
def generate_product_detail_df(input_dir):
    
    dirs = os.listdir(input_dir)

    meta_data = []
    for folder in dirs:
        xml_loc = glob(input_dir + "/" + folder + "/*.xml")[0]
        tree = ET.parse(xml_loc)
        directory = [elem.text for elem in tree.iter() if "MASK_FILENAME" in elem.tag][0].split("/")[1]
        tile_id = directory.split("_")[1]
        filepath_partial = input_dir + "/" + directory + "/IMG_DATA" + "/R10m"
        filepath = glob(filepath_partial + "/processed*.jp2")[0]
        filename = filepath.split("/")[-1]
        cloud_cover,no_data,unclassified = [elem.text for elem in tree.iter() if "CLOUDY_PIXEL_PERCENTAGE" in elem.tag 
                 or "NODATA_PIXEL_PERCENTAGE" in elem.tag or "UNCLASSIFIED_PERCENTAGE" in elem.tag]
        meta_data.append([directory,tile_id,cloud_cover,no_data,unclassified,filename,filepath])
    df = pd.DataFrame(meta_data,columns=["Directory","Tile_Id","Cloud Cover","No Data Percentage","Unclassified Percentage","Filename","Filepath"])
    df2 = df.sort_values(by=["Tile_Id","Cloud Cover","Unclassified Percentage"],ignore_index=True)
    return df2

In [6]:
input_dir = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/test_raw"

df = generate_product_detail_df(input_dir)


In [11]:
def order_masked_tiles(df,output_dir):


    layer = 1
    for index,row in df.iterrows(): 
        destination_dir = output_dir + str(layer)
        output_file = destination_dir + "/" + row["Filename"]

        # Check if directory exists
        if not os.path.isdir(destination_dir):
            os.mkdir(destination_dir)

        # Copy file to existing or new directory
        copyfile(row["Filepath"],output_file)

        # Check if Tile_Id already exists in the directory - only necessary up until the last tile
        if len(df) > index + 1:
            if df.loc[index,"Tile_Id"] == df.loc[index + 1,"Tile_Id"]:
                layer += 1
            else:
                layer = 1 

In [12]:
order_masked_tiles(df,"/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/test_ordered/")

In [22]:
# Crop to Polygon per Tile


df = pd.read_csv("/Users/purgatorid/Documents/GitHub/canopy-gis/data_collection/data/labelled/labels_Misha_v2.csv")
df_labels = df[["center-lat","center-long","polygon"]][0:33]

In [28]:
polygons = []
for polygon in df_labels["polygon"]:
    polygon_temp = []
    for coordinates in json.loads(polygon)["coordinates"]:
        for coordinate in coordinates:
            polygon_temp.append(tuple(coordinate))
        polygons.append(Polygon(polygon_temp))

In [31]:
gdf_series = gpd.GeoSeries(polygons)

In [32]:
gdf = gpd.GeoDataFrame(gdf_series,geometry=0)

In [35]:
gdf["geometry"] = gdf[0]

In [36]:
gdf = gdf.drop(columns=[0])

In [38]:
master_raster_v2 = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/Master_Rasters/msk_geotiff_full.tif"
polygon_list = gdf

In [None]:
src_raster_file = rio.open(master_raster_v2)

In [6]:
df = pd.read_csv("/Users/purgatorid/Documents/GitHub/canopy-gis/data_collection/data/labelled/labels_Misha_v2.csv")
df_labels = df[["center-lat","center-long","polygon"]][0:33]


polygons = []
for polygon in df_labels["polygon"]:
    polygon_temp = []
    for coordinates in json.loads(polygon)["coordinates"]:
        for coordinate in coordinates:
            polygon_temp.append(tuple(coordinate))
        polygons.append(Polygon(polygon_temp))

gdf_series = gpd.GeoSeries(polygons)
gdf = gpd.GeoDataFrame(gdf_series,geometry=0)
gdf["geometry"] = gdf[0]
gdf = gdf.drop(columns=[0])


master_raster_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/Master_Rasters/msk_geotiff_full.tif"
src_raster_file = rio.open(master_raster_path)

out_base_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/Polygon_Crops_Test/"
for index in range(gdf.shape[0]):
    crop_extent = gdf.loc[[index],"geometry"]
    
    
    raster_crop, raster_meta = es.crop_image(src_raster_file, crop_extent)
        
    # Update the metadata to have the new shape (x and y and affine information)
    raster_meta.update({"driver": "GTiff",
                     "height": raster_crop.shape[1],
                     "width": raster_crop.shape[2],
                     "transform": raster_meta["transform"]})

    # generate an extent for the newly cropped object for plotting
    cr_ext = rio.transform.array_bounds(raster_meta['height'], 
                                                raster_meta['width'], 
                                                raster_meta['transform'])

    bound_order = [0,2,1,3]
    cr_extent = [cr_ext[b] for b in bound_order]
    
    # mask the nodata
    raster_crop_ma = np.ma.masked_equal(raster_crop, 0) 

    
    # output_path
    outpath = out_base_path + str(index+1) + '.tif'
    
    # Check if directory exists
    if not os.path.isdir(out_base_path):
        os.mkdir(out_base_path)
    
    
    # Export cloud-masked TCI file
    with rio.open(outpath, 'w', **raster_meta) as outf:
            outf.write(raster_crop_ma)

In [13]:
def csv_to_gdf(csv_loc):
    '''
    import manually created areas of interest csv
    
    output is an in-memory geo dataframe with one polygon AOI per row to be utilized for cropping master raster
    
    '''
    df = pd.read_csv(csv_loc)
    df_labels = df[["center-lat","center-long","polygon"]][0:33]


    polygons = []
    for polygon in df_labels["polygon"]:
        polygon_temp = []
        for coordinates in json.loads(polygon)["coordinates"]:
            for coordinate in coordinates:
                polygon_temp.append(tuple(coordinate))
            polygons.append(Polygon(polygon_temp))

    gdf_series = gpd.GeoSeries(polygons)
    gdf = gpd.GeoDataFrame(gdf_series,geometry=0)
    gdf["geometry"] = gdf[0]
    gdf = gdf.drop(columns=[0])
    return gdf

In [15]:
csv_loc = "/Users/purgatorid/Documents/GitHub/canopy-gis/data_collection/data/labelled/labels_Misha_v2.csv"

gdf = csv_to_gdf(csv_loc)

In [16]:
def export_aoi_polygon_rasters(gdf,master_raster_path,output_dir):

    src_raster_file = rio.open(master_raster_path)
    
    for index in range(gdf.shape[0]):
        crop_extent = gdf.loc[[index],"geometry"]


        raster_crop, raster_meta = es.crop_image(src_raster_file, crop_extent)

        # Update the metadata to have the new shape (x and y and affine information)
        raster_meta.update({"driver": "GTiff",
                         "height": raster_crop.shape[1],
                         "width": raster_crop.shape[2],
                         "transform": raster_meta["transform"]})

        # generate an extent for the newly cropped object for plotting
        cr_ext = rio.transform.array_bounds(raster_meta['height'], 
                                                    raster_meta['width'], 
                                                    raster_meta['transform'])

        bound_order = [0,2,1,3]
        cr_extent = [cr_ext[b] for b in bound_order]

        # mask the nodata
        raster_crop_ma = np.ma.masked_equal(raster_crop, 0) 


        # output_path
        outpath = out_base_path + str(index+1) + '.tif'

        # Check if directory exists
        if not os.path.isdir(out_base_path):
            os.mkdir(out_base_path)


        # Export cloud-masked TCI file
        with rio.open(outpath, 'w', **raster_meta) as outf:
                outf.write(raster_crop_ma)

In [17]:
master_raster_path = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/Master_Rasters/msk_geotiff_full.tif"
output_dir = "/Volumes/Lacie/zhenyadata/Project_Canopy_Data/PC_Data/Sentinel_Data/Labelled/Tiles_v2_Misha/Polygon_Crops_Test/"

export_aoi_polygon_rasters(gdf,master_raster_path,output_dir)

KeyboardInterrupt: 