In [None]:
import os
import pickle
import numpy as np
import glob
import pandas as pd
from tqdm.auto import tqdm

import plotly.express as px
import ipywidgets as widgets
from ipywidgets import interact, HBox

from osgeo import gdal
from stackcomposed.stack_composed import parse as pc

In [None]:
data = pd.read_csv('data/Extracted_coordinates_sipalaga_site_phu_number.csv', delimiter=";")

In [None]:
# Filter data with the phu number
data = data[data.ID == 1109]

In [None]:
""" Notes for usage
    
    This script is not well optimized. For each given point it will read each image and 
    will extract the pixel value. This is because from a small instance there is not enough
    memory to store a stack of images.
    
    All the images must have the same date name format, because the script will sort the list
    according its date.
    
"""

In [None]:
image_files = glob.glob('/home/dguerrero/pysmm_downloads/1_processed/ReducedAreas_107PHU/1109/*.tif')

In [None]:
image_files.sort() 

In [None]:
gdal_file_extent = gdal.Open(image_files[0], gdal.GA_ReadOnly)# Assumming all images have the same extent


def read_images():
    gdal_file = gdal.Open(image_files[0], gdal.GA_ReadOnly)
    nodata_from_file = gdal_file.GetRasterBand(1).GetNoDataValue()

In [None]:
def read_raster(raster_path):
    gdal_file = gdal.Open(raster_path, gdal.GA_ReadOnly)
    raster_band = gdal_file.GetRasterBand(1).ReadAsArray()
    raster_band = raster_band.astype(np.float32)
    return raster_band

In [None]:
def get_sm(x, y):
    
    """ This function will create a dictionary with the extracted dates 
        for each image as key and the extracted value with the read_raster function
        as value. Then the result will be transformed as pandas df replacing the 0 values
        as numpy no data value.
    """
    ts_dict = {pd.to_datetime(pc.parse_other_files(image)[4]):read_raster(image)[x][y] for image in image_files}
    
    ts_df = pd.DataFrame(ts_dict.values(), index=ts_dict.keys())
    ts_df = ts_df.replace(0, np.nan)
    
    return ts_df

In [None]:
def add_xy(gdal_file_extent):
    
    """
    This functions explores the image extent and retrieve the position
    of a given coordinates in row-col type.
    """
    
    min_x, x_res, x_skew, max_y, y_skew, y_res = gdal_file_extent.GetGeoTransform()
    
    for index, row in data.iterrows(): 

        lon=float(row['longitude'])
        lat=float(row['latitude'])
        
        #column in pixel coordinates
        column = int(((max_y - lat) / x_res) + 1)

        #row in pixel coordinates
        row = int(((lon - min_x) / x_res) + 1)
        
        data.at[index,'row'] = row
        data.at[index,'column'] = column


In [None]:
add_xy(gdal_file_extent) # Create Rows and columns based on coordinates

In [None]:
data['ts_data'] = None

pbar = tqdm(total = len(data), desc="Retrieving...")

for index, row in data.iterrows():
    
    x = int(row['column'])
    y = int(row['row'])
    
    pbar.desc =f"Retrieving for {x}, {y}..."
    
    if x > 0 and y > 0:
        ts_df = get_sm(x, y)
        data.at[index, 'ts_data'] = ts_df
    
    pbar.update(1)
    

In [None]:
data.to_pickle('data/pysmm_ts_SIPAGALA_points_2.pkl')