In [1]:
import fluxlib
import sys
import numpy as np
import pandas as pd
from datetime import datetime
from sklearn.ensemble import RandomForestRegressor
from sklearn.neural_network import MLPRegressor
import warnings
from sklearn.model_selection import train_test_split
from scipy import stats
from sklearn.metrics import r2_score, mean_squared_error
from matplotlib import pyplot as plt
from sklearn import preprocessing
from pathlib import Path

import numpy as np
from sklearn.datasets import make_classification
from torch import nn
import torch
import torch.nn.functional as F

from skorch import NeuralNetClassifier, NeuralNetRegressor

np.random.seed(0)
warnings.simplefilter("ignore")

In [2]:
# load and format malaysia EC data
def load_dataset_malaysia(data_path, specified_columns, timestamp_format):
    #----------------------------------------------
    # load data
    raw_df = pd.read_csv(data_path, skiprows = [1])
    raw_df = raw_df.replace(-9999, np.nan)
    #----------------------------------------------
    # covert timestamp to datetime object
    timestamp = raw_df["Year"].map(str) + "-" + \
                raw_df["Month"].map(str) + "-" + \
                raw_df["Day"].map(str) + " " + \
                raw_df["Hour"].map(str) + ":" + \
                raw_df["Minute"].map(str)
    #-----------------------------------------------
    # extract specified columns
    df = raw_df.loc[:, specified_columns]
    #-----------------------------------------------
    # set timestamp as index
    df = df.set_index(
        timestamp.map(
            lambda x: datetime.strptime(x, timestamp_format)
        )
    )
    return df

In [9]:
# run for Sebungan and Sabaju
#------------------------------------------------------------------------------------
drivers = ["Rg_f", "Tair_f", "Tsoil_f", "VPD_f", "H_f", "LE_f", "Ustar", "WTD_1_1_1"]
drivers = ["Rg_f", "Tair_f", "VPD_f"]
flux = ["NEE"]
rg = ["Rg_f"]
bench_flux = ["NEE_f", "NEE_fall"]
qc = ["qcNEE"]
timestamp_name = "Timestamp"
timestamp_format = r"%Y-%m-%d %H:%M"

data_path = "./data/Sebungan_gapfilled_Ustar_01.csv"
# Sebungan_gapfilled_Ustar_01
# Sabaju_gapfilled_Ustar_005

#-------------------------------------------------
# load data:
df = load_dataset_malaysia(data_path, drivers + flux, timestamp_format)
#-------------------------------------------------
X = df[drivers]
y = df[flux]
# print(X)

X_ = X.copy()
for count in range(48):
    move = X.values[0:-(count + 1), :]
    # print(move.shape)
    pad = np.ones([(count + 1), 3]) * np.nan
    # print(pad.shape)
    # print(move.shape, pad.shape)
    # print(np.vstack([pad, move]).shape)
    temp = pd.DataFrame(
        np.vstack([pad, move]), 
        index = X_.index,
        columns = [str(count).zfill(3) + "_0", str(count).zfill(3) + "_1", str(count).zfill(3) + "_2"]
    )
    # print(X_.shape, temp.shape)
    X_ = pd.concat([X_, temp], axis = 1)
dataset = pd.concat([y, X_], axis = 1).dropna().values
y = dataset[:, 0].reshape(-1, 1)
X = dataset[:, 1::]
for i in range(X.shape[1]):
    X[:, i] = (X[:, i] - np.min(X[:, i])) / (np.max(X[:, i]) - np.min(X[:, i]))
for i in range(y.shape[1]):
    y[:, i] = (y[:, i] - np.min(y[:, i])) / (np.max(y[:, i]) - np.min(y[:, i]))
X = X.reshape(X.shape[0], -1, 3)
print(X.shape, y.shape)

(18905, 49, 3) (18905, 1)


In [11]:
y

array([[0.47661032],
       [0.51365003],
       [0.49045449],
       ...,
       [0.4607306 ],
       [0.43921727],
       [0.47049564]])

In [12]:
X = X.astype(np.float32)
y = y.astype(np.float32)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

class LSTM(nn.Module):
    def __init__(self, input_size = 3, hidden_size = 49, output_size = 1):
        super().__init__()
        self.lstm = torch.nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = 3,
            batch_first = True,
            dropout = 0.2
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 64),
            nn.Linear(64, output_size)
        )
        # self.out = torch.nn.Linear(in_features=hidden_size, out_features=1)


    def forward(self, x):
        # 一下关于shape的注释只针对单项
        # output: [batch_size, time_step, hidden_size]
        # h_n: [num_layers,batch_size, hidden_size] # 虽然LSTM的batch_first为True,但是h_n/c_n的第一维还是num_layers
        # c_n: 同h_n
        output, (h_n, c_n) = self.lstm(x)
        # print(output.size())
        # output_in_last_timestep=output[:,-1,:] # 也是可以的
        output_in_last_timestep = h_n[-1, :, :]
        # print(output_in_last_timestep.equal(output[:,-1,:])) #ture
        # x = self.out(output_in_last_timestep)
        x = self.fc(output_in_last_timestep)
        return x

net = NeuralNetRegressor(
    LSTM,
    max_epochs=10,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
    batch_size = 512,
)

net.fit(X_train, y_train)
y_proba = net.predict_proba(X_test)
print("ok")

  epoch    train_loss    valid_loss      dur
-------  ------------  ------------  -------
      1        [36m0.0105[0m        [32m0.0061[0m  14.2109
      2        [36m0.0064[0m        [32m0.0061[0m  16.3914
      3        0.0064        0.0061  15.9147
      4        [36m0.0064[0m        0.0061  17.0964
      5        0.0064        [32m0.0061[0m  14.8674
      6        0.0064        [32m0.0061[0m  14.7738
      7        0.0064        0.0061  15.1738
      8        0.0064        [32m0.0061[0m  14.7612
      9        [36m0.0064[0m        0.0061  14.7224
     10        0.0064        0.0061  14.7303
ok


In [16]:
pd.DataFrame([y_proba.ravel(), y_test.ravel()]).T.to_csv("Rgtest.csv")

In [20]:
import numpy as np
from sklearn.datasets import make_classification
from torch import nn
import torch
import torch.nn.functional as F

from skorch import NeuralNetClassifier, NeuralNetRegressor


X, y = make_classification(1000, 20, n_informative=10, random_state=0)
X = X[:, :, np.newaxis]
y = y[:, np.newaxis]
print(X.shape, y.shape)
X = X.astype(np.float32)
y = y.astype(np.float32)

class LSTM(nn.Module):
    def __init__(self, input_size = 1, hidden_size = 20, output_size = 1):
        super().__init__()
        self.lstm = torch.nn.LSTM(
            input_size = input_size,
            hidden_size = hidden_size,
            num_layers = 3,
            batch_first = True,
            dropout = 0.2
        )
        self.fc = nn.Sequential(
            nn.Linear(hidden_size, 512),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(512, 64),
            nn.Linear(64, output_size)
        )
        # self.out = torch.nn.Linear(in_features=hidden_size, out_features=1)


    def forward(self, x):
        # 一下关于shape的注释只针对单项
        # output: [batch_size, time_step, hidden_size]
        # h_n: [num_layers,batch_size, hidden_size] # 虽然LSTM的batch_first为True,但是h_n/c_n的第一维还是num_layers
        # c_n: 同h_n
        output, (h_n, c_n) = self.lstm(x)
        # print(output.size())
        # output_in_last_timestep=output[:,-1,:] # 也是可以的
        output_in_last_timestep = h_n[-1, :, :]
        # print(output_in_last_timestep.equal(output[:,-1,:])) #ture
        # x = self.out(output_in_last_timestep)
        x = self.fc(output_in_last_timestep)
        return x

class MyModule(nn.Module):
    def __init__(self, num_units=10, nonlin=F.relu):
        super(MyModule, self).__init__()

        self.dense0 = nn.Linear(20, num_units)
        self.nonlin = nonlin
        self.dropout = nn.Dropout(0.5)
        self.dense1 = nn.Linear(num_units, 10)
        self.output = nn.Linear(10, 2)

    def forward(self, X, **kwargs):
        X = self.nonlin(self.dense0(X))
        X = self.dropout(X)
        X = F.relu(self.dense1(X))
        X = F.softmax(self.output(X))
        return X


net = NeuralNetRegressor(
    LSTM,
    max_epochs=10,
    lr=0.1,
    # Shuffle training data on each epoch
    iterator_train__shuffle=True,
    batch_size = 512,
)

net.fit(X, y)
y_proba = net.predict_proba(X)
print("ok")
print(X.shape)

(1000, 20, 1) (1000, 1)
  epoch    train_loss    valid_loss     dur
-------  ------------  ------------  ------
      1        [36m0.5091[0m        [32m0.2690[0m  0.2024
      2        [36m0.2533[0m        [32m0.2514[0m  0.2084
      3        [36m0.2505[0m        [32m0.2491[0m  0.1956
      4        [36m0.2503[0m        0.2564  0.2394
      5        0.2505        0.2505  0.2104
      6        [36m0.2496[0m        0.2523  0.1965
      7        0.2499        0.2532  0.2074
      8        [36m0.2495[0m        0.2526  0.2005
      9        [36m0.2489[0m        0.2504  0.2094
     10        [36m0.2482[0m        0.2539  0.2094
ok
(1000, 20, 1)
