In [1]:
import pandas as pd
import numpy as np
from preprocess import prepare_df
import torch.nn as nn
import torch
import torch.nn.functional as F


In [3]:
DEVICE = torch.device("cuda:" + str(3))

In [4]:
data = pd.read_csv('utils/df_imputed.csv', index_col=0).drop(columns=['date'])


In [37]:
df_sensor, df_meta, df_gpp = prepare_df(data, meta_columns=['classid'])

In [38]:
sites_to_train = list(range(len(df_sensor)))
sites_to_train.remove(0)
sites_to_test = [0]

x_train = [df_sensor[i].values for i in sites_to_train]
conditional_train = [df_meta[i].values for i in sites_to_train]
y_train = [df_gpp[i].values.reshape(-1,1) for i in sites_to_train]

x_test = [df_sensor[i].values for i in sites_to_test]
conditional_test = [df_meta[i].values for i in sites_to_test]
y_test = [df_gpp[i].values.reshape(-1,1) for i in sites_to_test]

In [39]:
INPUT_FEATURES = len(df_sensor[0].columns) 
HIDDEN_DIM = 256
CONDITIONAL_FEATURES = len(df_meta[0].columns)

In [43]:
import torch.nn as nn
import torch


class Model(nn.Module):
    def __init__(self, input_dim, conditional_dim, hidden_dim, conditional):
        super().__init__()
        self.lstm = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, dropout=0.3,num_layers=2)
        self.conditional = conditional
        if self.conditional:
            self.fc1 = nn.Sequential(
            nn.Linear(in_features=hidden_dim, out_features=64),
            nn.ReLU()
        )
        else:
            self.fc1 = nn.Sequential(
            nn.Linear(in_features=hidden_dim, out_features=64),
            nn.ReLU()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=32),
            nn.ReLU()
        )
        self.fc3= nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.ReLU()
        )
        
        self.fc4 = nn.Linear(16+conditional_dim, 1)
        
    def forward(self, x, c):
        out, (h,d) = self.lstm(x.unsqueeze(1))
        out = out.squeeze(1)
        
        y = self.fc1(out)
        y = self.fc2(y)
        y = self.fc3(y)
        y = self.fc4(torch.cat([y,c], dim=1))
        return y

In [44]:
model = Model(INPUT_FEATURES,CONDITIONAL_FEATURES, HIDDEN_DIM, True ).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters())

In [45]:
import time
from sklearn.metrics import r2_score


for epoch in range(300):
    train_loss = 0.0
    train_r2 = 0.0
    start = time.time()
    model.train()
    for (x, y, conditional) in zip(x_train, y_train, conditional_train):
        x = torch.FloatTensor(x).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)
        c = torch.FloatTensor(conditional).to(DEVICE)
        
        y_pred = model(x,c)

        optimizer.zero_grad()
        loss = F.mse_loss( y_pred, y)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()
        train_r2 += r2_score(y_true=y.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
    
    model.eval()
    with torch.no_grad():
          for (x, y, conditional) in zip(x_test, y_test, conditional_test):
            x = torch.FloatTensor(x).to(DEVICE)
            y = torch.FloatTensor(y).to(DEVICE)
            c = torch.FloatTensor(conditional).to(DEVICE)
            

            y_pred = model(x,c)


            test_loss = F.mse_loss( y_pred, y)
            test_r2 = r2_score(y_true=y.detach().cpu().numpy()[masks[0]], y_pred=y_pred.detach().cpu().numpy()[masks[0]])
    
    end = time.time()       
    print(f"Epoch: {epoch+1} ({end-start:.2f}s)")
    print(f"Train loss: {train_loss / len(sites_to_train):.4f} | R2: {train_r2 / len(sites_to_train):.4f}")
    print(f"Test loss: {test_loss:.4f} | R2: {test_r2:.4f}") 

Epoch: 1 (14.29s)
Train loss: 0.6164 | R2: 0.3833
Test loss: 0.7693 | R2: 0.2530
Epoch: 2 (14.12s)
Train loss: 0.4918 | R2: 0.5080
Test loss: 0.7493 | R2: 0.2758
Epoch: 3 (14.04s)
Train loss: 0.4263 | R2: 0.5735
Test loss: 0.6458 | R2: 0.3987
Epoch: 4 (14.13s)
Train loss: 0.3933 | R2: 0.6066
Test loss: 0.6152 | R2: 0.4324
Epoch: 5 (14.12s)
Train loss: 0.3706 | R2: 0.6293
Test loss: 0.5781 | R2: 0.4799
Epoch: 6 (13.71s)
Train loss: 0.3551 | R2: 0.6448
Test loss: 0.5595 | R2: 0.5024
Epoch: 7 (14.49s)
Train loss: 0.3470 | R2: 0.6529
Test loss: 0.5359 | R2: 0.5379
Epoch: 8 (14.65s)
Train loss: 0.3380 | R2: 0.6619
Test loss: 0.5136 | R2: 0.5714
Epoch: 9 (14.04s)
Train loss: 0.3336 | R2: 0.6662
Test loss: 0.5129 | R2: 0.5925
Epoch: 10 (13.88s)
Train loss: 0.3285 | R2: 0.6714
Test loss: 0.5020 | R2: 0.6185
Epoch: 11 (13.70s)
Train loss: 0.3249 | R2: 0.6750
Test loss: 0.4889 | R2: 0.6332
Epoch: 12 (14.00s)
Train loss: 0.3199 | R2: 0.6799
Test loss: 0.4856 | R2: 0.6404
Epoch: 13 (13.94s)
Train 

KeyboardInterrupt: 

In [14]:
x = torch.FloatTensor(x_test[0]).to(DEVICE)
c = torch.FloatTensor(conditional_test[0]).to(DEVICE)
y_pred = model(x,c).detach().cpu()


In [36]:
import plotly.graph_objects as go
fig = go.Figure()


fig.add_trace(go.Scatter( y=y_train[0][masks[1]].reshape(-1),
                    mode='markers',
                    marker = dict(
                        color = 1,
                        size=4
                    ),
                    name='gt'))

In [17]:
y_pred.shape


torch.Size([1461, 1])