In [36]:
import numpy as np
import pandas as pd

# pipiline core 
from sklearn.model_selection import GridSearchCV

from pypots.data import load_specific_dataset
from pypots.imputation import SAITS, Transformer

import plotly.express as px
import plotly.graph_objs as go
from plotly.subplots import make_subplots

In [2]:
# Data preprocessing. Tedious, but PyPOTS can help.
df = pd.read_csv('../field8_4Year_time_series.csv')  # PyPOTS will automatically download and extract it.
df = df.rename(columns={"Unnamed: 0": "date"})

df.shape


(1096, 12)

In [3]:
X = df.drop(['date'], axis = 1)
X = (X.to_numpy()).reshape(-1,len(X),11)
#X_intact, X, missing_mask, indicating_mask = mcar(X, 0.1) # hold out 10% observed values as ground truth
#X = masked_fill(X, 1 - missing_mask, np.nan)
dataset = {"X": X}
X.shape

(1, 1096, 11)

In [25]:
# Model training. This is PyPOTS showtime.
saits = Transformer(n_steps=len(X), n_features=11, n_layers=2, d_model=256, d_inner=128, n_heads=4, d_k=64, d_v=64, dropout=0.0, epochs=10)
# Here I use the whole dataset as the training set because ground truth is not visible to the model, you can also split it into train/val/test sets
saits.fit(dataset)
imputation = saits.predict(dataset)  # impute the originally-missing values and artificially-missing values


2023-11-11 05:14:56 [INFO]: No given device, using default device: cpu
2023-11-11 05:14:56 [INFO]: Model initialized successfully with the number of trainable parameters: 666,891
2023-11-11 05:14:56 [INFO]: epoch 0: training loss 88.5687
2023-11-11 05:14:56 [INFO]: epoch 1: training loss 88.6662
2023-11-11 05:14:56 [INFO]: epoch 2: training loss 86.9160
2023-11-11 05:14:57 [INFO]: epoch 3: training loss 86.9140
2023-11-11 05:14:57 [INFO]: epoch 4: training loss 87.8152
2023-11-11 05:14:57 [INFO]: epoch 5: training loss 83.3009
2023-11-11 05:14:57 [INFO]: epoch 6: training loss 86.5441
2023-11-11 05:14:57 [INFO]: epoch 7: training loss 86.0894
2023-11-11 05:14:58 [INFO]: epoch 8: training loss 85.0309
2023-11-11 05:14:58 [INFO]: epoch 9: training loss 84.8561
2023-11-11 05:14:58 [INFO]: Finished training.


In [30]:
imputation['imputation'].reshape(-1,11).shape

(1096, 11)

In [34]:
inputed_df = pd.DataFrame(imputation['imputation'].reshape(-1,11), columns = [ 'gdd', 'pmm', 'tmax', 'tmean', 'tmin', 'elevation', 'exposure',
        'slope', 'ndvi', 'ndwi', 'reci'])
inputed_df['date']=df['date']

In [39]:
fig = make_subplots(specs=[[{"secondary_y": True}]])
fig.add_trace(go.Scatter(
        name='NDVI',
        x=inputed_df['date'],
        y=inputed_df['ndvi'],
        mode='lines',
        line=dict(color='rgb(31, 119, 180)'),
    ))
fig.add_trace(go.Scatter(
        name='NDWI',
        x=inputed_df['date'],
        y=inputed_df['ndwi'],
        mode='lines',
        line=dict(color='rgb(90, 200, 70)'),
    ))

fig.add_trace(go.Scatter(
        name='NDWI',
        x=df['date'],
        y=df['ndwi'],
        mode='lines',
        line=dict(color='rgb(90, 200, 270)'),
    ))

fig.add_trace(go.Scatter(
        name='NDVI',
        x=df['date'],
        y=df['ndvi'],
        mode='lines',
        line=dict(color='rgb(31, 119, 180)'),
    ))