In [None]:
import pandas as pd

In [None]:
df = pd.read_pickle('../data/ndvi/plots/ndvi_daily_interp.pkl')

In [None]:
plot_id = 1356
df = df[
    # (df['year'] == 2024) &
    (df['plot_id'] == plot_id)
].copy()

In [None]:
df = df.set_index('index')
df

In [None]:
import matplotlib.pyplot as plt

In [None]:
plt.plot(df['ndvi_smooth_interp'])

In [None]:
def decomp_ts(ts, period, model = 'additive'):
    res = sts.seasonal_decompose(ts, model = model, period = period)
    return(pd.DataFrame({'ts': ts, 'trend': res.trend, 'seasonal': res.seasonal, 'resid': res.resid}, 
                        index = ts.index))

def plot_acf_pacf(x, lags = 40):
    x = x[x.notna()] # remove NAs
    fig, axes = plt.subplots(1, 2, figsize = (15, 5))
    fig = splt.plot_acf(x, lags = lags, ax = axes[0])
    fig = splt.plot_pacf(x, lags = lags, ax = axes[1]);
    return None

def plot_ts_resid(x):
    x = x[x.notna()] # remove NAs
    fig, axes = plt.subplots(1, 2, figsize = (15, 5))
    fig = sns.lineplot(x, ax = axes[0])
    fig = sns.distplot(x, ax = axes[1]);
    return None

In [None]:
df['week_int'] = df.index.isocalendar().week.astype(int)
df['week_sq'] = df['week_int']**2

In [None]:
# one-hot encode month_int
month_ohe = pd.get_dummies(df['week_int'], prefix='week')

# append to dataframe
df = pd.concat([df, month_ohe], axis=1)

In [None]:
# normalize month_int and month_sqr
df['week_int_norm'] = (df['week_int'] - df['week_int'].mean()) / df['week_int'].std()
df['week_sq_norm'] = (df['week_sq'] - df['week_sq'].mean()) / df['week_sq'].std()

In [None]:
import numpy as np

In [None]:
# get lag features
df['ndvi_smooth_interp_log'] = np.log(df['ndvi_smooth_interp'])
df['ndvi_smooth_interp_log_lag_1'] = df['ndvi_smooth_interp_log'].shift(1)
df['ndvi_smooth_interp_log_lag_2'] = df['ndvi_smooth_interp_log'].shift(2)
df = df.fillna(0)

In [None]:
from sklearn.linear_model import LinearRegression
train = df[:-366].copy()
test = df[-366:].copy()

# identify features and labels
X_cols = ['week_int_norm', 'week_sq_norm', 'ndvi_smooth_interp_log_lag_1', 'ndvi_smooth_interp_log_lag_2']
X_cols.extend(month_ohe.columns)

y_col = 'ndvi_smooth_interp_log'

# separate features and labels

X_train = train[X_cols].copy()
X_test = test[X_cols].copy()

y_train = train[y_col].copy()
y_test = test[y_col].copy()
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

lr_model = LinearRegression()
lr_model.fit(X_train_scaled, y_train)

# build and train model
# lr_model = LinearRegression()
# lr_model.fit(X_train, y_train)
lr_test_preds = lr_model.predict(X_test_scaled)
lr_train_preds = lr_model.predict(X_train_scaled)

plt.figure(figsize = (10,6))
plt.plot(df.index, np.exp(df['ndvi_smooth_interp_log']), label = 'Truth', linewidth = 6, alpha = 0.4)
plt.plot(X_train.index[1:], np.exp(lr_train_preds[1:]), label = 'Train Predictions', alpha = 1)
plt.plot(X_test.index, np.exp(lr_test_preds), label = 'Test Predictions', alpha = 1)
plt.plot(np.exp(lr_train_preds[1:]) - np.exp(y_train[1:]), label = 'Train Residuals')
plt.plot(np.exp(lr_test_preds) - np.exp(y_test), label = 'Test Residuals')
plt.legend(loc = 'center left')
plt.title(f'NDVI Time Series of Plot {plot_id}\nLinear Regression')
plt.xlabel('Year')
plt.ylabel('NDVI')
plt.savefig('../img/ts_lr.png')
plt.show()

In [None]:
from sklearn.metrics import mean_squared_error


from sklearn.metrics import mean_squared_error, r2_score


print("Tune Decision Tree R2:", r2_score(y_test, lr_test_preds))
print("Tune Decision Tree RMSE:", mean_squared_error(y_test, lr_test_preds))



print("train Decision Tree R2:", r2_score(y_train, lr_train_preds))
print("train Decision Tree RMSE:", mean_squared_error(y_train, lr_train_preds))
