In [1]:
import pandas as pd
import numpy as np
from preprocess import prepare_df
import torch.nn as nn
import torch
import torch.nn.functional as F


In [2]:
DEVICE = torch.device("cuda:" + str(1))

In [3]:
data = pd.read_csv('utils/df_imputed.csv', index_col=0).drop(columns=['date'])


In [4]:
df_sensor, df_meta, df_gpp = prepare_df(data, meta_columns=['classid'])

In [29]:
df_sensor[0].columns

Index(['TA_F', 'TA_F_DAY', 'TA_F_NIGHT', 'SW_IN_F', 'LW_IN_F', 'VPD_F', 'PA_F',
       'P_F', 'WS_F', 'wscal', 'fpar'],
      dtype='object')

Unnamed: 0_level_0,TA_F,TA_F_DAY,TA_F_NIGHT,SW_IN_F,LW_IN_F,VPD_F,PA_F,P_F,WS_F,wscal,fpar
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AU-ASM,1.628606,1.516775,1.750389,0.909396,1.579206,1.456687,-1.513130,-0.234035,-1.913783,0.167922,-1.751617
AU-ASM,1.673539,1.627677,1.712027,1.341873,1.244163,1.815236,-1.068557,-0.234035,0.750865,0.104941,-1.701092
AU-ASM,1.364473,1.249931,1.480321,1.447249,0.700057,1.621732,-0.740375,-0.234035,2.481649,0.046282,-1.650722
AU-ASM,1.197645,1.086586,1.302843,0.644752,1.061455,1.238843,-1.038028,-0.234035,1.697929,-0.008665,-1.600504
AU-ASM,0.251624,-0.132874,0.674431,-1.999308,1.118866,-0.755737,-0.942626,4.751457,0.704667,0.747387,-1.550436
...,...,...,...,...,...,...,...,...,...,...,...
AU-ASM,0.180278,0.159266,0.234280,0.708357,-0.671450,0.553107,-0.679317,-0.234035,-0.275396,-0.625298,-1.277838
AU-ASM,-0.032395,0.028003,-0.066291,0.753189,-0.841547,0.391807,-0.265273,-0.234035,-1.052516,-0.629015,-1.264518
AU-ASM,0.087680,0.149703,0.056033,0.745946,-0.672326,0.540188,-0.148883,-0.234035,-0.237447,-0.632771,-1.248024
AU-ASM,0.420730,0.558297,0.323789,0.709199,-0.469270,0.958183,-0.194676,-0.234035,-0.781927,-0.636709,-1.228974


In [82]:
sites_to_train = list(range(len(df_sensor)))
sites_to_train.remove(33)
sites_to_test = [33]

x_train = [df_sensor[i].values for i in sites_to_train]
conditional_train = [df_meta[i].values for i in sites_to_train]
y_train = [df_gpp[i].values.reshape(-1,1) for i in sites_to_train]

x_test = [df_sensor[i].values for i in sites_to_test]
conditional_test = [df_meta[i].values for i in sites_to_test]
y_test = [df_gpp[i].values.reshape(-1,1) for i in sites_to_test]

In [83]:
for i in sites_to_train:
    x_train.append(df_sensor[i].shift(100).dropna().values)
    y_train.append(df_gpp[i].shift(100).dropna().values.reshape(-1,1))

In [84]:
len(x_train)

104

In [40]:
HIDDEN_DIM = 64
CONDITIONAL_FEATURES = len(df_meta[0].columns)

In [87]:
import torch.nn as nn
import torch


class Model(nn.Module):
    def __init__(self, input_dim, conditional_dim, hidden_dim, conditional):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, dropout=0.5,num_layers=5)
        self.conditional = conditional
        if self.conditional:
            self.fc1 = nn.Sequential(
            nn.Linear(in_features=hidden_dim, out_features=64),
            nn.ReLU()
        )
        else:
            self.fc1 = nn.Sequential(
            nn.Linear(in_features=hidden_dim, out_features=64),
            nn.ReLU()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32),
            nn.BatchNorm1d(32),
            nn.ReLU()
        )
        self.fc3= nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.BatchNorm1d(16),
            nn.ReLU()
        )
        
        self.fc4 = nn.Linear(16+conditional_dim, 1)
        
    def forward(self, x, c):
        out, (h,d) = self.lstm(x.unsqueeze(1))
        out = out.squeeze(1)
        
        y = self.fc1(out)
        y = self.fc2(y)
        y = self.fc3(y)
        y = self.fc4(torch.cat([y,c], dim=1))
        return y

In [85]:
INPUT_FEATURES = 11
model = Model(INPUT_FEATURES,CONDITIONAL_FEATURES, HIDDEN_DIM, True ).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [None]:
import time
from sklearn.metrics import r2_score


for epoch in range(300):
    train_loss = 0.0
    train_r2 = 0.0
    start = time.time()
    model.train()
    for (x, y, conditional) in zip(x_train, y_train, conditional_train):
        x = torch.FloatTensor(x).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)
        c = torch.FloatTensor(conditional).to(DEVICE)
        
        y_pred = model(x,c)

        optimizer.zero_grad()
        loss = F.mse_loss( y_pred, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_r2 += r2_score(y_true=y.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
    
    model.eval()
    with torch.no_grad():
          for (x, y, conditional) in zip(x_test, y_test, conditional_test):
            x = torch.FloatTensor(x).to(DEVICE)
            y = torch.FloatTensor(y).to(DEVICE)
            c = torch.FloatTensor(conditional).to(DEVICE)
            

            y_pred = model(x,c)


            test_loss = F.mse_loss( y_pred, y)
            test_r2 = r2_score(y_true=y.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
    
    end = time.time()       
    print(f"Epoch: {epoch+1} ({end-start:.2f}s)")
    print(f"Train loss: {train_loss / len(sites_to_train):.4f} | R2: {train_r2 / len(sites_to_train):.4f}")
    print(f"Test loss: {test_loss:.4f} | R2: {test_r2:.4f}") 

Epoch: 1 (25.76s)
Train loss: 0.3174 | R2: 0.6825
Test loss: 1.0007 | R2: -0.0009
Epoch: 2 (24.45s)
Train loss: 0.3203 | R2: 0.6795
Test loss: 0.9862 | R2: 0.0136
Epoch: 3 (24.62s)
Train loss: 0.3141 | R2: 0.6858
Test loss: 1.0287 | R2: -0.0290
Epoch: 4 (23.11s)
Train loss: 0.3104 | R2: 0.6895
Test loss: 1.0159 | R2: -0.0161
Epoch: 5 (22.99s)
Train loss: 0.3040 | R2: 0.6958
Test loss: 1.0274 | R2: -0.0277
Epoch: 6 (24.36s)
Train loss: 0.2987 | R2: 0.7012
Test loss: 1.0626 | R2: -0.0628
Epoch: 7 (22.96s)
Train loss: 0.2938 | R2: 0.7061
Test loss: 1.0458 | R2: -0.0461
Epoch: 8 (21.62s)
Train loss: 0.2897 | R2: 0.7102
Test loss: 1.0482 | R2: -0.0485
Epoch: 9 (21.01s)
Train loss: 0.2843 | R2: 0.7156
Test loss: 1.0477 | R2: -0.0479
Epoch: 10 (24.70s)
Train loss: 0.2802 | R2: 0.7197
Test loss: 1.0507 | R2: -0.0509
Epoch: 11 (22.78s)
Train loss: 0.2761 | R2: 0.7237
Test loss: 1.0279 | R2: -0.0282
Epoch: 12 (23.91s)
Train loss: 0.2705 | R2: 0.7294
Test loss: 1.0027 | R2: -0.0029
Epoch: 13 (24.

In [68]:
x = torch.FloatTensor(x_test[0]).to(DEVICE)
c = torch.FloatTensor(conditional_test[0]).to(DEVICE)
y_pred = model(x,c).detach().cpu()


In [69]:
import plotly.graph_objects as go
fig = go.Figure()


fig.add_trace(go.Scatter( y=y_test[0].reshape(-1),
                    mode='markers',
                    marker = dict(
                        color = 1,
                        size=4
                    ),
                    name='gt'))

fig.add_trace(go.Scatter( y=y_pred.reshape(-1),
                    mode='markers',
                    marker = dict(
                        color = 1,
                        size=4
                    ),
                    name='gt'))

In [17]:
df.shift()


torch.Size([1461, 1])