# Get sen2 DF

In [None]:
import rasterio
import numpy as np
import pandas as pd
import geopandas as gpd
import os
import glob
import time
from rasterio.merge import merge
from rasterio.plot import show
import rasterio.features
import rasterio.warp
from geojson import Point, Feature, FeatureCollection, dump
import shapely.wkt

In [None]:
root_dir = !pwd
root_dir = str(root_dir[0])
folder = "/sen2/merged_reprojected"
files_folder = root_dir + folder


In [None]:

# define search criteria
search_criteria = "*.tif"
search_term = os.path.join(files_folder, search_criteria)
# perform search


In [None]:
# create DF and uniwe
files = glob.glob(search_term)
dates = []
tiles = []
for i in files:
    dates.append(i[78:86])
    tiles.append(i[102:108])
df_sen2 = pd.DataFrame(np.column_stack([files, dates,tiles]),columns=["files","dates","tiles"])
unique_dates = pd.unique(df_sen2["dates"])

In [None]:
df_sen2

In [None]:
names = []
geoms = []
out_fp = root_dir + "footprints"


# Extract Footprints
counter = 0
for i in df_sen2["files"]:
    # def counter cond
    counter = counter+1
    if counter%1==0:
        perc = 100 * float(counter)/float(len(df_sen2["files"]))
        print(str(int(perc))+"%","       ",end="\r")
    
    
    file_name = i[i.rfind("/")+1:i.rfind(".")]
    #print(file_name)
    out_name = out_fp + file_name+".GeoJSON"
    #print(out_name)
    
    with rasterio.open(i) as dataset:

        # Read the dataset's valid data mask as a ndarray.
        mask = dataset.dataset_mask()

        # Extract feature shapes and values from the array.
        for geom, val in rasterio.features.shapes(
                mask, transform=dataset.transform):

            # Transform shapes from the dataset's own coordinate
            # reference system to CRS84 (EPSG:4326).
            #geom = rasterio.warp.transform_geom(
            #    dataset.crs, 'EPSG:4326', geom, precision=6)

            # Print GeoJSON shapes to stdout.
            n = out_name[out_name.rfind("/")+1:out_name.rfind(".")] + ".jp2"
            n = n.replace("footprints","")
            names.append(n)
            geoms.append(geom)
print(str(100)+"%","       ",end="\r")

In [None]:
# Modify WKT to be able to pass into GDF
wkt_geoms = []
for v,i in enumerate(geoms):
    coordinates = i["coordinates"]
    wkt_str = "POLYGON (("
    for coor in coordinates:
        for x in coor:
            wkt_str=wkt_str+""
            x_1 = str(x).replace(",","")
            x_1 = x_1.replace("(","")
            x_1 = x_1.replace(")","")
            #x_1 = x_1.replace(".0","")
            wkt_str = wkt_str+x_1+","
        wkt_str = wkt_str[:wkt_str.rfind(",")] + wkt_str[wkt_str.rfind(",")+1:]
            
    wkt_str = wkt_str.replace("[","")
    wkt_str = wkt_str.replace("]","")
    wkt_str = wkt_str+"))"
    #print(wkt_str)
    wkt_geoms.append(shapely.wkt.loads(wkt_str))

df_sen2["geom"] = wkt_geoms
gdf_sen2 = gpd.GeoDataFrame(df_sen2, geometry=df_sen2.geom)
gdf_sen2.set_crs(epsg=2154)

In [None]:
gdf_sen2.plot()

# Get Spot6 DF
## Get Spot6 Footprints

In [None]:
# define inputs
sensor = "spot6"
root_dir = "/home/simon/CDE_UBS/thesis/data_collection/"
out_fp = root_dir + "footprints"
path = root_dir + sensor



# define search criteria automatically based on sensor
if sensor == "spot6":
    search_criteria = "*.jp2"
if sensor == "sen2":
    search_criteria = "*.tif"
search_term = os.path.join(path, search_criteria)
# perform search
files = glob.glob(search_term)


In [None]:
names = []
geoms = []

# Extract Footprints
counter = 0
for i in files:
    # def counter cond
    counter = counter+1
    if counter%1==0:
        perc = 100 * float(counter)/float(len(files))
        print(str(int(perc))+"%","       ",end="\r")
    
    
    file_name = i[i.rfind("/")+1:i.rfind(".")]
    #print(file_name)
    out_name = out_fp + file_name+".GeoJSON"
    #print(out_name)
    
    with rasterio.open(i) as dataset:

        # Read the dataset's valid data mask as a ndarray.
        mask = dataset.dataset_mask()

        # Extract feature shapes and values from the array.
        for geom, val in rasterio.features.shapes(
                mask, transform=dataset.transform):

            # Transform shapes from the dataset's own coordinate
            # reference system to CRS84 (EPSG:4326).
            #geom = rasterio.warp.transform_geom(
            #    dataset.crs, 'EPSG:4326', geom, precision=6)

            # Print GeoJSON shapes to stdout.
            n = out_name[out_name.rfind("/")+1:out_name.rfind(".")] + ".jp2"
            n = n.replace("footprints","")
            names.append(n)
            geoms.append(geom)
print(str(100)+"%","       ",end="\r")

In [None]:
# Modify WKT to be able to pass into GDF
wkt_geoms = []
for v,i in enumerate(geoms):
    coordinates = i["coordinates"]
    wkt_str = "POLYGON (("
    for coor in coordinates:
        for x in coor:
            wkt_str=wkt_str+""
            x_1 = str(x).replace(",","")
            x_1 = x_1.replace("(","")
            x_1 = x_1.replace(")","")
            #x_1 = x_1.replace(".0","")
            wkt_str = wkt_str+x_1+","
            #print(wkt_str)
        wkt_str = wkt_str[:wkt_str.rfind(",")] + wkt_str[wkt_str.rfind(",")+1:]
            
    wkt_str = wkt_str.replace("[","")
    wkt_str = wkt_str.replace("]","")
    wkt_str = wkt_str+"))"
    #print(wkt_str)
    wkt_geoms.append(shapely.wkt.loads(wkt_str))

df_spot6 = pd.DataFrame()
df_spot6["name"] = names
df_spot6["geom"] = wkt_geoms
gdf_spot6 = gpd.GeoDataFrame(df_spot6, geometry=df_spot6.geom)
gdf_spot6.set_crs(epsg=2154)

In [None]:
gdf_spot6.plot()

## Get Centroids

In [None]:
def get_centroid(gdf):
    a = gpd.GeoSeries(gdf.centroid)
    return(list(a))

In [None]:
gdf_spot6["centroid"] = get_centroid(gdf_spot6)
gdf_sen2["centroid"] = get_centroid(gdf_sen2)

## Get Spot6 positions in sen2 tiles

In [None]:
# convert geometryx to centroid
gdf_spot6.set_geometry("centroid",inplace=True)
gdf_spot6.plot(markersize=0.1)

In [None]:
join_spot6_sen2 = gdf_spot6.sjoin(gdf_sen2, how="inner")

In [None]:
join_spot6_sen2.head()

In [None]:
join_spot6_sen2 = gpd.GeoDataFrame(join_spot6_sen2, geometry="centroid_left")
join_spot6_sen2.set_crs(epsg=2154,inplace=True)

In [None]:
join_spot6_sen2.plot(markersize=0.1)

## Get Spot6 Dates

In [None]:
# Load gdf of date info

In [None]:
path = "/home/simon/CDE_UBS/thesis/data_collection/footprints/other/"
dates_spot6 = gpd.read_file(path+"FRANCE_2018_LA93_INFO.shp")

In [None]:
# keep only dare and geometry column
dates_spot6 = dates_spot6[["DATE","geometry"]]
dates_spot6.set_crs(epsg=2154)
dates_spot6.plot()

In [None]:
dates_spot6.head(1)

## Join Sen2/Spot6 Dates & Tiles with Spot6 aq. dates

In [None]:
# rop previous remenants
join_spot6_sen2.drop('index_right', inplace=True, axis=1)

In [None]:
# perform inner join and clean up df
join_spot6_sen2_DATES = join_spot6_sen2.sjoin(dates_spot6, how="inner")
join_spot6_sen2_DATES.drop(["geom_right","centroid_right","index_right"], inplace=True, axis=1)

In [None]:
join_spot6_sen2_DATES.plot(markersize=0.1)

In [None]:
# transform back to geometry of footprint
join_spot6_sen2_DATES = gpd.GeoDataFrame(join_spot6_sen2_DATES, geometry="geometry")
join_spot6_sen2_DATES.set_crs(epsg=2154,inplace=True)

In [None]:
join_spot6_sen2_DATES.plot()

# Find closest date

## Transform dates to proper format

In [None]:
import time
from datetime import datetime

In [None]:
ls_sen2_date = []
ls_spot6_date = []
for date_s2,date_s6 in zip(join_spot6_sen2_DATES["dates"],join_spot6_sen2_DATES["DATE"]):
    ls_sen2_date.append(datetime.strptime(date_s2, "%Y%m%d"))
    ls_spot6_date.append(datetime.strptime(date_s6, "%Y%m%d"))
join_spot6_sen2_DATES["date_sen2"] = ls_sen2_date
join_spot6_sen2_DATES["date_spot62"] = ls_spot6_date
    

In [None]:
join_spot6_sen2_DATES.head(2)

## Perform check for closest date

In [None]:
spot6_unique = join_spot6_sen2_DATES["name"].unique()
sen2_tiles_unique = join_spot6_sen2_DATES["tiles"].unique()

In [None]:
for spot6_image in spot6_unique:
    tmp = join_spot6_sen2_DATES[join_spot6_sen2_DATES["name"] == spot6_image]