In [36]:
import ee
import geemap
import functions_process as funcs_process
import functions_masking as funcs_masking
import functions_sampling as funcs_sampling
from pathlib import Path
from tqdm.notebook import tqdm
import geopandas as gpd
import pandas as pd

In [37]:
ee.Authenticate()
ee.Initialize()

KeyboardInterrupt: 

In [None]:
%load_ext autoreload
%autoreload 2
import functions_process as funcs_process
import functions_sampling as funcs_sampling

# prepare insitu data
path_data = Path('data/insitu/parsed/lubw_rhine_stations.csv') # Baden-Württemberg Rhine stations (3)
#path_data = Path('data/insitu/parsed/bs_rhine_station.csv') # Basel-stadt Weil am Rhein station
df = gpd.read_file(path_data, ignore_geometry=True)
df['geometry'] = gpd.GeoSeries.from_wkt(df['geometry'], crs='epsg:4326')
gdf_data = gpd.GeoDataFrame(df)

# filter hour of day
# gdf_data['dt64'] = pd.to_datetime(gdf_data.dt64, utc=True)
# gdf_data = gdf_data.loc[pd.to_datetime(gdf_data.dt64).dt.hour==9]

tasks = []
for identifier in tqdm(gdf_data.station_id.unique()):
    fn = identifier
    gdf_data_station = gdf_data.loc[gdf_data.station_id==identifier]
    dates = gdf_data_station.sort_values('dt64').dt64
    start_date, end_date = dates.iloc[0], dates.iloc[-1]
    fc_station = ee.FeatureCollection(geemap.gdf_to_ee(
        gdf_data_station, 
        date='dt64', date_format='YYYY-MM-dd'))
        #date='dt64', date_format="YYYY-MM-dd'T'HH:mm:ss'Z'"))
    bounds = fc_station.geometry()
    ic_st = funcs_process.load_st_imcoll(start_date, end_date, bounds, cld_buffer=250, water_buffer=100, watermask='qa')
    fc_matchups = funcs_sampling.get_matchups(fc_station, ic_st, max_diff=0.5) \
        .map(funcs_sampling.get_sample)
    task = ee.batch.Export.table.toDrive(**{
        'collection': fc_matchups, 
        'description': f'LST_{fn}',
        'folder': 'SOSW_LST_24042024'})
    task.start()
    tasks.append(task)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


  0%|          | 0/3 [00:00<?, ?it/s]

In [None]:
Map = geemap.Map()
Map.add_basemap("Esri.WorldImagery")
Map.add_basemap("OpenTopoMap")
Map

Map(center=[0, 0], controls=(WidgetControl(options=['position', 'transparent_bg'], widget=SearchDataGUI(childr…

In [None]:
img = ic_st.first()

landsat_tirs = {
    'min': 273.15,
    'max': 293.15,
    'bands': ['ST'],
    'palette': ['006633', 'E5FFCC', '662A00', 'D8D8D8', 'F5F5F5']
}

Map.addLayer(img, {}, 'Landsat LST')

In [None]:
import time
from datetime import datetime, timedelta

def check_tasks_status(tasks):
    """ Check the state of all provided ee.task objects and posts status updates. """
    colordict = {'white': '\033[0m', 'red': '\033[91m', 'orange': '\033[93m', 'green': '\033[92m'}
    states = []
    for task in tasks:
        # get state and times
        status = task.status()
        state = status['state']
        task_id = status['id']
        time_start, time_update = status['creation_timestamp_ms'], status['update_timestamp_ms']
        time_elapsed = timedelta(milliseconds=(time_update-time_start))
        time_now = datetime.now()
        # set output color
        if state == 'COMPLETED':
            color ='green'
        elif (state == 'RUNNING') | (state == 'READY'):
            color = 'orange'
        elif (state == 'FAILED') | (state == 'CANCEL_REQUESTED') | (state == 'CANCELLED'):
            color = 'red'
        else:
            color = 'white'
        # print msg
        status_msg = f"[{str(time_now)[:19]}] Task {task_id}" \
                     f"({status.get('description', 'No description')}): {colordict[color]+state+colordict['white']}"
                     #f" (runtime: {time_elapsed.seconds/60:0.1f}min)"
        print(status_msg)
        states.append(state)
    return states

all_completed = False
while not all_completed:
    check_tasks_status(tasks)
    states = [task.status()['state'] for task in tasks]
    if all(state in ['COMPLETED', 'FAILED', 'CANCEL_REQUESTED'] for state in states):
        all_completed = True
        
    else:
        time.sleep(30)
n_failed = sum([state in ['FAILED', 'CANCEL_REQUESTED'] for state in states])
print(f"All export tasks finished ({n_failed} tasks failed).")

[2024-04-25 13:25:11] Task OLGTPZJGYVRGVGSKACE4OSDD(LST_CXX334): [93mRUNNING[0m
[2024-04-25 13:25:11] Task WM66G3G346NKQE2YM35I4KGP(LST_CXX359): [93mRUNNING[0m
[2024-04-25 13:25:12] Task KZJX4WDBSUQVWD6F363MT67R(LST_CXX147): [93mREADY[0m
[2024-04-25 13:25:43] Task OLGTPZJGYVRGVGSKACE4OSDD(LST_CXX334): [93mRUNNING[0m
[2024-04-25 13:25:44] Task WM66G3G346NKQE2YM35I4KGP(LST_CXX359): [93mRUNNING[0m
[2024-04-25 13:25:44] Task KZJX4WDBSUQVWD6F363MT67R(LST_CXX147): [93mREADY[0m
[2024-04-25 13:26:15] Task OLGTPZJGYVRGVGSKACE4OSDD(LST_CXX334): [93mRUNNING[0m
[2024-04-25 13:26:16] Task WM66G3G346NKQE2YM35I4KGP(LST_CXX359): [93mRUNNING[0m
[2024-04-25 13:26:16] Task KZJX4WDBSUQVWD6F363MT67R(LST_CXX147): [93mREADY[0m
[2024-04-25 13:26:47] Task OLGTPZJGYVRGVGSKACE4OSDD(LST_CXX334): [93mRUNNING[0m
[2024-04-25 13:26:47] Task WM66G3G346NKQE2YM35I4KGP(LST_CXX359): [93mRUNNING[0m
[2024-04-25 13:26:48] Task KZJX4WDBSUQVWD6F363MT67R(LST_CXX147): [93mREADY[0m
[2024-04-25 13:27:19] Ta

In [None]:
import ast 

path_drive = Path(r"G:\Meine Ablage\Earth Engine\SOSW_LST_24042024")
paths_csv = list(path_drive.glob('*.csv'))

df = pd.DataFrame([])
for path in paths_csv:
    print(f'Reading file {path.name}..')
    try:
        df_temp = pd.read_csv(path)
        df = pd.concat([df, df_temp])
    except pd.errors.EmptyDataError:
        print(f'Note: {path.name} was empty. Skipping.')
        continue # will skip the rest of the block and move to next file

def parse_str_dict(string):
    string = string.replace('=', '":').replace(', ', ', "').replace('null', 'None')[1:-1]
    string = '{"' + string +'}'
    parsed_dict = dict(ast.literal_eval(string))
    return parsed_dict

df['values_eo'] = df.values_eo.apply(parse_str_dict)
df_values_eo = df['values_eo'].apply(pd.Series)
df = pd.concat([df.drop('values_eo', axis=1), df_values_eo], axis=1)
df['ST_deg'] = df['ST']-273.15


Reading file LST_bs_war.csv..
Reading file LST_CXX359.csv..
Reading file LST_CXX334.csv..
Reading file LST_CXX147.csv..


In [83]:
import plotly.express as px
import plotly.graph_objects as go
from scipy.stats import linregress
import numpy as np
import math

x_name, y_name = 'ST_deg', 'temperature'

ds_dt64 = pd.to_datetime(df.timestamp)
ds_hour = ds_dt64.dt.hour + ds_dt64.dt.minute / 60 + ds_dt64.dt.second / 3600

fig = px.scatter(df, x=x_name, y=y_name, 
                 #color="sensor", 
                 custom_data='station_id',
                 facet_col="station_id",
                 width=1200, height=400,
                 trendline="ols", 
                 opacity=0.5,
                 color='sensor',
                 labels={
                     'temperature': "T<sub>insitu</sub>",
                     'ST_deg': "T<sub>Landsat</sub>"
                 },
                 template="simple_white")

fig.update_layout(
    margin=dict(l=20, r=20, t=20, b=20),
    xaxis=dict(scaleanchor="y", scaleratio=1),  # Ensuring equal scaling
    yaxis=dict(constrain='domain'),  # Ensuring y-axis adjusts to match x-axis scale
    #paper_bgcolor="LightSteelBlue",
)

def get_lingress_dict(x, y):
    """ Fit linear regression and get statistics."""
    x = x.dropna()
    y = y.loc[x.index]
    slope, intercept, r_value, p_value, std_err = linregress(x, y)
    predicted_y = slope * x + intercept
    mse = np.mean((y - predicted_y) ** 2)
    return {'intercept': intercept,
            'slope': slope,
            'p': p_value,
            'r': r_value,
            'r2': r_value**2,
            'mse': mse,
            'rmse': np.sqrt(mse)
            }

lims = [
    min(df[x_name].min(), df[y_name].min()),  # Find the lower bound
    max(df[x_name].max(), df[y_name].max())   # Find the upper bound
]

for idx, group in enumerate(df.groupby('station_id')):
    id = group[0]
    df_group = group[1]
    x, y = df_group.ST_deg, df_group.temperature
    statdict = get_lingress_dict(x, y)
    # add annotation
    fig.add_annotation(x=2, y=25,
                align='left',
                xref=f'x{idx+1}',
                yref=f'y{idx+1}',
                text=(f'<b>R2:</b> {statdict['r2']:0.2f}<br>' \
                      f'<b>MSE:</b> {statdict['mse']:0.2f}<br>' \
                      f'<b>RMSE:</b> {statdict['rmse']:0.2f}'),
                showarrow=False,
                yshift=10)
    fig.add_trace(go.Scatter(x=lims, y=lims, 
                             mode='lines', name='1:1 Line', line=dict(dash='dash')),
                             row=1, col=idx+1)

fig.show()