In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

In [None]:
# load data
df_ts = pd.read_csv('../data/raw_tmp/ts_raw_vor_plot.csv', parse_dates=[1])

In [None]:
# identify duplicates
df_ts['duplicated'] = df_ts.duplicated(subset=['Id', 'Date'], keep=False)
df_ts['dup_first'] = df_ts.duplicated(subset=['Id', 'Date'], keep='first')
df_ts['dup_last'] = df_ts.duplicated(subset=['Id', 'Date'], keep='last')

In [None]:
# drop duplicates based on year, keeping last entry for Jan/Feb and first entry for Nov/Dec
# this corresponds to how data are smoothed within year, but extending before/after each year during extraction
df_ts = df_ts[
(~df_ts['duplicated']) |
(df_ts['Date'].dt.month < 3) & df_ts['duplicated'] & (~df_ts['dup_last']) |
(df_ts['Date'].dt.month > 10) & df_ts['duplicated'] & (~df_ts['dup_first'])]

df_ts = df_ts.drop(columns=['duplicated', 'dup_last', 'dup_first'])

In [None]:
cols = [x for x in df_ts.columns[2:] if '_smooth' not in x]

In [None]:
# plot smoothed data for a single plot
plot = '15E_P3'
fig, axs = plt.subplots(figsize=(20, 3*len(cols)), nrows=len(cols))
for idx, ax in enumerate(axs):
    df_ts[df_ts['Id'] == plot].plot.scatter(x='Date', y=cols[idx], ax=ax, c='black', s=10, alpha=0.5)
    df_ts[df_ts['Id'] == plot].plot(x='Date', y=cols[idx] + '_smooth', ax=ax, c='red')

In [None]:
veg_idxs = ['NDVI', 'NDII7', 'SAVI', 'RDVI', 'MTVI1', 'PSRI', 'NDWI', 'EVI', 'TCGI']

In [None]:
# add in the 1st derivatives of the specified vegetation indices
for i in veg_idxs:
    df_ts[i + '_dv1'] = df_ts.groupby([df_ts['Date'].dt.year, 'Id'])[i + '_smooth'].transform(lambda x: x.diff())

In [None]:
# plot
plot1 = '7NW_P2'
plot2 = '20NW_P4'
var1 = 'NDTI'
var2 = 'SWIR1'

fig, axs = plt.subplots(figsize=(20, 6), nrows=2)
df_ts[df_ts['Id'] == plot1].plot.scatter(x='Date', y=var1, ax=axs[0], c='blue', s=10, alpha=0.5)
df_ts[df_ts['Id'] == plot2].plot.scatter(x='Date', y=var1, ax=axs[0], c='red', s=10, alpha=0.5)
df_ts[df_ts['Id'] == plot1].plot(x='Date', y= var1 + '_smooth', ax=axs[0], c='blue')
df_ts[df_ts['Id'] == plot2].plot(x='Date', y= var1 + '_smooth', ax=axs[0], c='red')

df_ts[df_ts['Id'] == plot1].plot.scatter(x='Date', y=var2, ax=axs[1], c='blue', s=10, alpha=0.5)
df_ts[df_ts['Id'] == plot2].plot.scatter(x='Date', y=var2, ax=axs[1], c='red', s=10, alpha=0.5)
df_ts[df_ts['Id'] == plot1].plot(x='Date', y= var2 + '_smooth', ax=axs[1], c='blue')
df_ts[df_ts['Id'] == plot2].plot(x='Date', y= var2 + '_smooth', ax=axs[1], c='red')

for ax in axs:
    for yr in range(2013, 2023):
        ax.axvline(pd.Timestamp(str(yr) + '-06-12'), color='g')
        ax.axvline(pd.Timestamp(str(yr) + '-10-5'), color='orange')

In [None]:
# plot
plot = '15E_P3'
var1 = 'NDVI'
var2 = 'NDVI_dv1'

fig, axs = plt.subplots(figsize=(20, 6), nrows=2)
df_ts[df_ts['Id'] == plot].plot.scatter(x='Date', y=var1, ax=axs[0], c='black', s=10, alpha=0.5)
df_ts[df_ts['Id'] == plot].plot(x='Date', y= var1 + '_smooth', ax=axs[0], c='red')
df_ts[df_ts['Id'] == plot].plot(x='Date', y= var2 , ax=axs[1], c='red')

In [None]:
df_vor = pd.read_csv('../data/training/vor_2013_2022_cln_2023_08_29_plot_hls_idxs.csv', parse_dates=[2,3])

In [None]:
df_out = pd.merge(df_vor,
                  df_ts[['Id', 'Date'] + [i + '_dv1' for i in veg_idxs]], 
                  on=['Id', 'Date'],
                  how='left')

In [None]:
df_out.to_csv('../data/training/vor_2013_2022_cln_2023_08_29_plot_hls_idxs_dv1.csv', index=False)

In [None]:
for c in cols:
    print(c + ': ', (abs((df_ts[c] - df_ts[c + '_smooth']) / df_ts[c + '_smooth'])).mean().round(2))

In [None]:
for c in cols:
    print(c + ': ', (20*np.log10(abs(df_ts[c + '_smooth'] / (df_ts[c] - df_ts[c + '_smooth']))).mean().round(2)))

In [None]:
df_ts[c + '_smooth']

In [None]:
df_out.columns