In [1]:
#from model.model import EncoderWithTime, Reparametrize, DecoderNoTime, Regressor, Model
from sklearn.metrics import r2_score
from utils.preprocess import prepare_df, normalize
from utils.visualization import plot_site
from model.loss import loss_fn
from tqdm import tqdm
import json
import torch
import pandas as pd
import argparse
import numpy as np
import time

In [17]:
DEVICE = torch.device("cuda:" + str(4))

In [450]:
data = pd.read_csv('../data/df_final.csv', index_col=0).drop(columns=['lat', 'lon', 'elv','date','c4','whc'])                                                                       

In [474]:
df_sensor, df_meta, df_gpp = prepare_df(data,sites=good_sites)

In [379]:
good_sites = pd.read_csv("../data/df_20210507.csv")['sitename'].unique()


Columns (14) have mixed types.Specify dtype option on import or set low_memory=False.



In [353]:
data['plant_functional_type'].unique()

array([nan, 'Grass', 'Shrub', 'Evergreen Broadleaf Trees',
       'Deciduous Broadleaf Trees', 'Cereal crop',
       'Evergreen Needleleaf Trees', 'Water'], dtype=object)

In [406]:
data[data.plant_functional_type == 'Cereal crop'].index.unique()

Index(['CH-Oe1', 'CZ-wet', 'DE-Geb', 'DE-Kli', 'IT-PT1', 'JP-SMF'], dtype='object', name='sitename')

In [407]:
list(good_sites).index('JP-SMF')

40

In [475]:
import torch.nn as nn
import torch

class EncoderWithTime(nn.Module):
    def __init__(self, input_features, output_features):
        super().__init__()
        self.input_features = input_features
        self.output_features = output_features

        self.rnn = nn.LSTM(input_size=input_features, hidden_size=output_features)
    
    def forward(self, x):
        outputs, (h, c) = self.rnn(x)

        return outputs.squeeze(1) #shape=(seq_len, batch=1 i think, num_dir * output_features)

class Reparametrize(nn.Module):
    def __init__(self, encoder_output, latent_size, conditional_size):
        super().__init__()
        self.encoder_output = encoder_output
        self.latent_size = latent_size

        self.fc_to_mean = nn.Linear(encoder_output+conditional_size, latent_size)
        self.fc_to_logvar = nn.Linear(encoder_output+conditional_size, latent_size)

        nn.init.xavier_uniform_(self.fc_to_mean.weight)
        nn.init.xavier_uniform_(self.fc_to_logvar.weight)

    def forward(self, x, condition):
        x =  torch.cat([x, condition], dim=1)
        self.mean = self.fc_to_mean(x)
        self.logvar = self.fc_to_logvar(x)

        std = torch.exp(0.5 * self.logvar)
        eps = torch.randn_like(std)
        z = eps.mul(std).add_(self.mean)
        return z, self.mean, self.logvar

class DecoderNoTime(nn.Module):
    def __init__(self, latent_size, input_features, condition_features, condition_decoder):
        super().__init__()
        self.latent_size = latent_size
        self.input_features = input_features
        self.condition_decoder = condition_decoder
    
        if self.condition_decoder:
            self.fc1 = nn.Sequential(
                nn.Linear(in_features=latent_size + condition_features, out_features=64),
                nn.ReLU()
            )
        else:
            self.fc1 = nn.Sequential(
                nn.Linear(in_features=latent_size, out_features=64),
                nn.ReLU()
            )
        self.fc2 = nn.Sequential(
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU()
        )
        self.fc3 = nn.Sequential(
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU()
        )
        self.fc4 = nn.Sequential(
            nn.Linear(in_features=64, out_features=64),
            nn.ReLU()
        )
        self.fc5 = nn.Linear(in_features=64, out_features=input_features)

    def forward(self, z, condition):
        if self.condition_decoder:
            z = torch.cat([z, condition], dim=1)
        z = self.fc1(z)
        z = self.fc2(z)
        z = self.fc3(z)
        z = self.fc4(z)
        x = self.fc5(z)

        return x

class Regressor(nn.Module):
    def __init__(self, input_features, conditional_features):
        super().__init__()        
        self.fc1 = nn.Sequential(
            nn.Linear(in_features=input_features+conditional_features, out_features=32),
            nn.ReLU()
        )

        self.fc2 = nn.Sequential(
            nn.Linear(in_features=32, out_features=32),
            nn.ReLU()
        )

        self.fc3 = nn.Sequential(
            nn.Linear(in_features=32, out_features=32),
            nn.ReLU()
        )

        self.fc4 = nn.Sequential(
            nn.Linear(in_features=32, out_features=32),
            nn.ReLU()
        )

        self.fc5 = nn.Sequential(
            nn.Linear(in_features=32, out_features=16),
            nn.ReLU()
        )
        
        self.fc6 = nn.Linear(16, 1)
    
    def forward(self, x, conditional):
        x = torch.cat([x, conditional], dim=1)
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.fc4(x)
        x = self.fc5(x)
        x = self.fc6(x)

        return x

class Model(nn.Module):
    def __init__(self, encoder, reparametrize, decoder, regressor):
        super().__init__()
        self.encoder = encoder
        self.decoder = decoder
        self.reparametrize = reparametrize
        self.regressor = regressor

    def forward(self, x, conditional):
        x = self.encoder(x)
        z, mean, logvar = self.reparametrize(x, conditional)
        x = self.decoder(z, conditional)
        y = self.regressor(z, conditional)
        return x, mean, logvar, y
   

In [501]:
ENCODER_OUTPUT_SIZE = 256
LATENT_SIZE = 64
CONDITIONAL_FEATURES = len(df_meta[0].columns)
CONDITION_DECODER = True
INPUT_FEATURES = len(df_sensor[0].columns) 

In [502]:
sites_to_train = list(range(len(df_sensor)))
sites_to_train.remove(3)
sites_to_test = [3]

x_train = [df_sensor[i].values for i in sites_to_train]
conditional_train = [df_meta[i].values for i in sites_to_train]
y_train = [df_gpp[i].values.reshape(-1,1) for i in sites_to_train]

x_test = [df_sensor[i].values for i in sites_to_test]
conditional_test = [df_meta[i].values for i in sites_to_test]
y_test = [df_gpp[i].values.reshape(-1,1) for i in sites_to_test]

In [503]:
encoder = EncoderWithTime(INPUT_FEATURES, ENCODER_OUTPUT_SIZE).to(DEVICE)
reparam = Reparametrize(ENCODER_OUTPUT_SIZE, LATENT_SIZE, CONDITIONAL_FEATURES).to(DEVICE)
decoder = DecoderNoTime(LATENT_SIZE, len(df_sensor[0].columns), CONDITIONAL_FEATURES, CONDITION_DECODER).to(DEVICE)
regressor = Regressor(LATENT_SIZE, CONDITIONAL_FEATURES)
model = Model(encoder, reparam, decoder, regressor).to(DEVICE)
  

optimizer = torch.optim.Adam(model.parameters())


In [504]:
for epoch in range(100):
    train_loss = 0.0
    train_kl_loss = 0.0
    train_recon_loss = 0.0
    train_reg_loss = 0.0
    train_r2 = 0.0
    start = time.time()
    model.train()
    for (x, y, conditional) in zip(x_train, y_train, conditional_train):
        x = torch.FloatTensor(x).unsqueeze(1).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)
        conditional = torch.FloatTensor(conditional).to(DEVICE)
        if x.shape[0] == 0:
            continue
        outputs, mean, logvar, y_pred = model(x, conditional)
        x = x.squeeze(1)

        optimizer.zero_grad()
        loss, recon_loss, kl_loss, reg_loss = loss_fn(outputs, x, y_pred, y, mean, logvar, 1)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        train_r2 += r2_score(y_true=y.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
    model.eval()
    with torch.no_grad():
          for (x, y, conditional) in zip(x_test, y_test, conditional_test):
            x = torch.FloatTensor(x).unsqueeze(1).to(DEVICE)
            y = torch.FloatTensor(y).to(DEVICE)
            conditional = torch.FloatTensor(conditional).to(DEVICE)
            outputs, mean, logvar, y_pred = model(x, conditional)


            x = x.squeeze(1)
            test_loss, test_recon_loss, test_kl_loss, test_reg_loss = loss_fn(outputs, x, y_pred, y, mean, logvar, 1)
            test_r2 = r2_score(y_true=y.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
    
    end = time.time()       
    print(f"Epoch: {epoch+1} ({end-start:.2f}s)")
    print(f"Train loss: {train_loss / len(sites_to_train):.2f} | R2: {train_r2 / len(sites_to_train):.2f}")
    print(f"Test loss: {test_loss:.2f} | R2: {test_r2:.2f}")      

Epoch: 1 (6.23s)
Train loss: 1.59 | R2: 0.23
Test loss: 1.16 | R2: 0.58
Epoch: 2 (6.17s)
Train loss: 0.96 | R2: 0.67
Test loss: 0.96 | R2: 0.77
Epoch: 3 (6.15s)
Train loss: 0.84 | R2: 0.73
Test loss: 0.88 | R2: 0.82
Epoch: 4 (6.19s)
Train loss: 0.80 | R2: 0.74
Test loss: 0.85 | R2: 0.82
Epoch: 5 (6.15s)
Train loss: 0.77 | R2: 0.75
Test loss: 0.82 | R2: 0.83
Epoch: 6 (6.16s)
Train loss: 0.74 | R2: 0.76
Test loss: 0.79 | R2: 0.83
Epoch: 7 (6.16s)
Train loss: 0.70 | R2: 0.76
Test loss: 0.74 | R2: 0.84
Epoch: 8 (6.17s)
Train loss: 0.67 | R2: 0.76
Test loss: 0.70 | R2: 0.85
Epoch: 9 (6.18s)
Train loss: 0.64 | R2: 0.77
Test loss: 0.70 | R2: 0.84
Epoch: 10 (5.68s)
Train loss: 0.62 | R2: 0.77
Test loss: 0.68 | R2: 0.85
Epoch: 11 (5.65s)
Train loss: 0.60 | R2: 0.78
Test loss: 0.71 | R2: 0.82
Epoch: 12 (5.65s)
Train loss: 0.58 | R2: 0.79
Test loss: 0.66 | R2: 0.89
Epoch: 13 (5.65s)
Train loss: 0.57 | R2: 0.79
Test loss: 0.61 | R2: 0.89
Epoch: 14 (5.64s)
Train loss: 0.55 | R2: 0.79
Test loss: 0.5

KeyboardInterrupt: 

In [None]:
x = torch.FloatTensor(x_test[0]).unsqueeze(1).to(DEVICE)
conditional = torch.FloatTensor(conditional_test[0]).to(DEVICE)



outputs, mean, logvar, y_pred = model(x,conditional)

In [415]:
fig = go.Figure()

# Add traces
fig.add_trace(go.Scatter(x=mean[:,1].detach().cpu(), y=mean[:,0].detach().cpu(),
                    mode='markers',
                    marker = dict(
                        color = y_test[0].reshape(-1),
                        colorscale ="viridis",
                        showscale = True
                    ),
                    
                    name='reconstructed'))

In [583]:
x = torch.FloatTensor(x_train[1]).unsqueeze(1).to(DEVICE)
conditional = torch.FloatTensor(conditional_train[1]).to(DEVICE)



outputs, mean1, logvar, y_pred = model(x,conditional)

In [584]:
x = torch.FloatTensor(x_train[20]).unsqueeze(1).to(DEVICE)
conditional = torch.FloatTensor(conditional_train[20]).to(DEVICE)



outputs, mean2, logvar, y_pred = model(x,conditional)

In [585]:
x = torch.FloatTensor(x_train[41]).unsqueeze(1).to(DEVICE)
conditional = torch.FloatTensor(conditional_train[41]).to(DEVICE)



outputs, mean3, logvar, y_pred = model(x,conditional)

In [586]:
mean2.shape

torch.Size([5113, 64])

In [587]:
from sklearn.manifold import TSNE
from sklearn.decomposition import PCA
d = TSNE(n_components=2)
emb = d.fit_transform(np.vstack((mean1.detach().cpu()[:1000,:],mean2.detach().cpu()[:1000,:], mean3.detach().cpu()[:1000,:])))


In [588]:
emb.shape

(3000, 2)

mean2.shape

In [589]:
mean1 = emb[:1000,:]
mean2 = emb[1000:,:]
mean3 = emb[2000:,:]

In [590]:
fig = go.Figure()
fig.add_trace(go.Scatter(x=mean1[:,0], y=mean1[:,1],
                    mode='markers',
                    marker = dict(
                        color = 1,
                        size=3
                    ),
                    name='site1'))
fig.add_trace(go.Scatter(x=mean2[:,0], y=mean2[:,1],
                    mode='markers',
                    marker = dict(
                        color = 'coral',
                        size=3
                    ),
                    name='site2'))
fig.add_trace(go.Scatter(x=mean3[:,0], y=mean3[:,1],
                    mode='markers',
                    marker = dict(
                        color = 'purple',
                        size=3
                    ),
                    name='site3'))