In [1]:
import pandas as pd
import numpy as np
import random
import time
import torch
from preprocess import prepare_df
from sklearn.metrics import r2_score
from model.model_alex import Encoder, Decoder, Reparametrize,Model, loss_fn


In [4]:
DEVICE = torch.device("cuda:" + str(1))

In [2]:
data = pd.read_csv('utils/df_imputed.csv', index_col=0).drop(columns=['date'])


In [3]:
df_sensor, df_meta, df_gpp = prepare_df(data, meta_columns=[ 'classid' ,'igbp_land_use'])  

In [24]:
raw = pd.read_csv('../data/df_20210510.csv', index_col=0)['GPP_NT_VUT_REF']
raw = raw[raw.index != 'CN-Cng']

In [25]:
import operator
sites = raw.index.unique()
masks = []
for s in sites:
    mask = raw[raw.index == s].isna().values
    masks.append(list(map(operator.not_, mask)))

In [28]:
!nvidia-smi

Wed May 12 11:25:40 2021       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 450.80.02    Driver Version: 450.80.02    CUDA Version: 11.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  TITAN X (Pascal)    Off  | 00000000:04:00.0 Off |                  N/A |
| 32%   52C    P2    56W / 250W |   8557MiB / 12196MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  TITAN X (Pascal)    Off  | 00000000:05:00.0 Off |                  N/A |
| 23%   27C    P8     9W / 250W |      2MiB / 12196MiB |      0%      Default |
|       

In [27]:
for i in range(len(masks)):
    if len(masks[i]) != df_sensor[i].values.shape[0]:
        print("problem")


In [19]:
df_sensor[4].values.shape

(5113, 11)

In [51]:
sites_to_train = list(range(len(df_sensor)))
sites_to_train.remove(0)
sites_to_test = [0]

x_train = [pd.concat((df_sensor[i], df_meta[i]), axis=1).values for i in sites_to_train]
conditional_train = [df_meta[i].values for i in sites_to_train]
y_train = [df_gpp[i].values.reshape(-1,1) for i in sites_to_train]

x_test = [pd.concat((df_sensor[i],df_meta[i]), axis=1).values for i in sites_to_test]
conditional_test = [df_meta[i].values for i in sites_to_test]
y_test = [df_gpp[i].values.reshape(-1,1) for i in sites_to_test]



In [52]:
ENCODER_INPUT_DIM = x_train[0].shape[1]
ENCODER_OUTPUT_DIM = 16
REPARAM_INPUT_DIM = ENCODER_OUTPUT_DIM
LATENT_DIM = 32
REPARAM_OUTPUT_DIM = LATENT_DIM
DECODER_INPUT_DIM = LATENT_DIM + conditional_train[0].shape[1]
DECODER_OUTPUT_DIM = len(df_sensor[0].columns)

DEVICE = 'cuda:7'

encoder = Encoder(ENCODER_INPUT_DIM, ENCODER_OUTPUT_DIM).to(DEVICE)
decoder = Decoder(DECODER_INPUT_DIM, DECODER_OUTPUT_DIM).to(DEVICE)
reparam = Reparametrize(REPARAM_INPUT_DIM, REPARAM_OUTPUT_DIM).to(DEVICE)
model = Model(encoder, decoder, reparam).to(DEVICE)

optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)

In [None]:
EPOCHS = 200
for epoch in range(EPOCHS):
    train_loss = 0.0
    train_kl_loss = 0.0
    train_recon_loss = 0.0
    train_label_loss = 0.0
    train_r2 = 0.0

    test_loss = 0.0
    test_kl_loss = 0.0
    test_recon_loss = 0.0
    test_label_loss = 0.0
    test_r2 = 0.0

    model.train()
    for (x, c, y) in zip(x_train, conditional_train, y_train):
        # Convert to tensors
        x = torch.FloatTensor(x).unsqueeze(1).to(DEVICE)
        c = torch.FloatTensor(c).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)

        # Get predictions
        out, mean, logvar, pz_mean, r_mean, r_logvar = model(x, c)

        # Remove the conditional from x
        x = x.squeeze(1)[:, :len(df_sensor[0].columns)]

        # Get loss and update
        optimizer.zero_grad()
        loss, kl_loss, recon_loss, label_loss = loss_fn(out, x, mean, logvar, 1, pz_mean, r_mean, r_logvar, y)
        loss.backward()
        optimizer.step()
    
        # Update losses
        train_loss += loss.item()
        train_kl_loss += kl_loss.item()
        train_recon_loss += recon_loss.item()
        train_label_loss += label_loss.item()
        train_r2 += r2_score(y.detach().cpu().numpy(), r_mean.detach().cpu().numpy())

    model.eval()
    with torch.no_grad():
        for (x, c, y) in zip(x_test, conditional_test, y_test):
            # Convert to tensors
            x = torch.FloatTensor(x).unsqueeze(1).to(DEVICE)
            c = torch.FloatTensor(c).to(DEVICE)
            y = torch.FloatTensor(y).to(DEVICE)

            # Get predictions
            out, mean, logvar, pz_mean, r_mean, r_logvar = model(x, c)

            # Remove the conditional from x
            x = x.squeeze(1)[:, :len(df_sensor[0].columns)]

            # Get loss
            loss, kl_loss, recon_loss, label_loss = loss_fn(out, x, mean, logvar, 1, pz_mean, r_mean, r_logvar, y)
        
            # Update losses
            test_loss += loss.item()
            test_kl_loss += kl_loss.item()
            test_recon_loss += recon_loss.item()
            test_label_loss += label_loss.item()
            test_r2 += r2_score(y.detach().cpu().numpy()[masks[0]], r_mean.detach().cpu().numpy()[masks[0]])

    

    train_loss /= len(x_train)
    train_kl_loss /= len(x_train)
    train_recon_loss /= len(x_train)ii
    train_label_loss /= len(x_train)
    train_r2 /= len(x_train)

    test_loss /= len(x_test)
    test_kl_loss /= len(x_test)
    test_recon_loss /= len(x_test)
    test_label_loss /= len(x_test)
    test_r2 /= len(x_test)
    
    print(f"Epoch: {epoch+1}/{EPOCHS}")
    print(f"Train loss: {train_loss:.4f} | Recon loss: {train_recon_loss:.4f} | KL loss: {train_kl_loss:.4f} | Label loss: {train_label_loss:.4f}")
    print(f"Test loss: {test_loss:.4f} | Recon loss: {test_recon_loss:.4f} | KL loss: {test_kl_loss:.4f} | Label loss: {test_label_loss:.4f}")
    print(f"Train R2: {train_r2:.4f}")
    print(f"Test R2: {test_r2:.4f}")

Epoch: 1/200
Train loss: 1.8352 | Recon loss: 1.0181 | KL loss: 0.3207 | Label loss: 0.4964
Test loss: 1.9411 | Recon loss: 1.0079 | KL loss: 0.3044 | Label loss: 0.6287
Train R2: 0.0561
Test R2: -0.1786
Epoch: 2/200
Train loss: 1.6677 | Recon loss: 0.9941 | KL loss: 0.2772 | Label loss: 0.3965
Test loss: 1.8903 | Recon loss: 0.9871 | KL loss: 0.2802 | Label loss: 0.6230
Train R2: 0.2333
Test R2: -0.1930
Epoch: 3/200
Train loss: 1.4965 | Recon loss: 0.9501 | KL loss: 0.2415 | Label loss: 0.3049
Test loss: 1.7980 | Recon loss: 0.9333 | KL loss: 0.2477 | Label loss: 0.6170
Train R2: 0.3417
Test R2: -0.2183
Epoch: 4/200
Train loss: 1.2678 | Recon loss: 0.8428 | KL loss: 0.2179 | Label loss: 0.2072
Test loss: 1.6565 | Recon loss: 0.8330 | KL loss: 0.2176 | Label loss: 0.6060
Train R2: 0.4084
Test R2: -0.2378
Epoch: 5/200
Train loss: 1.0513 | Recon loss: 0.7329 | KL loss: 0.1905 | Label loss: 0.1279
Test loss: 1.5512 | Recon loss: 0.7920 | KL loss: 0.1811 | Label loss: 0.5781
Train R2: 0.44

In [138]:
x1 = torch.FloatTensor(x_train[0]).unsqueeze(1).to(DEVICE)
c1 = torch.FloatTensor(conditional_train[0]).to(DEVICE)

x2 = torch.FloatTensor(x_train[36]).unsqueeze(1).to(DEVICE)
c2 = torch.FloatTensor(conditional_train[36]).to(DEVICE)



out1, mean1, logvar1, _, _, _ = model(x1, c1)
out2, mean2, logvar2, _, _ , _  = model(x2, c2)



In [31]:
dates =  pd.read_csv('utils/df_imputed.csv', index_col=0)['date']



In [128]:
df_sensor[1]

Unnamed: 0_level_0,TA_F,TA_F_DAY,TA_F_NIGHT,SW_IN_F,LW_IN_F,VPD_F,PA_F,P_F,WS_F,wscal,fpar
sitename,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
AU-DaP,0.590463,0.453090,0.559006,-0.016712,1.124821,-0.820642,-0.582705,0.026965,-1.500803,1.112448,-0.141235
AU-DaP,0.881695,0.592673,0.995299,-0.269499,1.056200,-0.465562,-0.721137,-0.334313,-0.015120,1.064970,-0.099243
AU-DaP,1.153721,0.940978,1.191533,1.154609,0.915528,0.090558,-0.843284,-0.292120,0.454474,1.022847,-0.058022
AU-DaP,1.316249,1.123936,1.333106,1.140427,1.032234,0.321703,-0.905714,-0.264749,0.191043,0.984703,-0.017569
AU-DaP,1.379598,1.074691,1.505457,0.788985,1.073150,0.192750,-0.875856,0.043060,-0.515802,0.982577,0.022119
...,...,...,...,...,...,...,...,...,...,...,...
AU-DaP,-0.243675,-1.009268,0.341105,-3.363518,0.930481,-1.437810,-0.362841,-0.251018,-0.839772,1.601560,-0.467591
AU-DaP,0.222984,-0.255260,0.525520,-1.058786,1.091227,-1.160670,-0.398128,-0.120620,-1.122838,1.568323,-0.496684
AU-DaP,0.615401,0.188274,0.865297,-0.024410,1.153063,-0.744374,-0.313982,-0.040962,-1.814957,1.542335,-0.527445
AU-DaP,0.572691,0.075760,0.890411,0.239495,1.017947,-0.906276,-0.221694,0.049153,-1.580978,1.526360,-0.559869


In [139]:
season = {"winter":[12,1,2], "summer":[6,7,8], "spring":[9,10,11], "autumn":[3,4,5]}
#season = {"winter":[6,7,8], "summer":[12,1,2], "spring":[9,10,11], "autumn":[3,4,5]}
site = "IT-Tor"
colors = []
for date in dates[dates.index == site].values:
    month = int(date.split('-')[1])
    if month in season["summer"]:
        colors.append("coral")
    elif month in season["autumn"]:
        colors.append(1)
    elif month in season["winter"]:
        colors.append("cornflowerblue")
    else:
        colors.append(2)

summer_idx = []
winter_idx = []
for date,i in zip(dates[dates.index == site].values, range(len(dates[dates.index==site].values))):
    month = int(date.split('-')[1])
    if month in season["summer"]:
        summer_idx.append(i)
    elif month in season["winter"]:
        winter_idx.append(i)
   

In [140]:
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
mean = pca.fit_transform(mean2.detach().cpu())

In [25]:
from plotly import graph_objects as go

fig = go.Figure()

r = [i for i in range(50,70)]
fig.add_trace(go.Histogram(x=r,nbinsx=20))
fig.add_vline(x=np.mean(r))


In [143]:
from plotly import graph_objects as go
from plotly.subplots import make_subplots

fig = make_subplots(rows=2, cols=1)

fig.add_trace(go.Scatter(x=mean[winter_idx,0], y=mean[winter_idx,1],
                    mode='markers',
                    name='Winter (Dec, Jan, Feb)', 
                    marker=dict(size=4, color=np.asarray(colors)[winter_idx]
                               )), row=1, col=1)
fig.add_trace(go.Scatter(x=mean[summer_idx,0], y=mean[summer_idx,1],
                    mode='markers',
                    name='Summer (Jun, Jul, Aug) ', 
                    marker=dict(size=4, color=np.asarray(colors)[summer_idx]
                               )), row=1, col=1)

fig.add_trace(go.Scatter(x=mean[winter_idx+summer_idx,0], y=mean[winter_idx+summer_idx,1],
                    mode='markers',
                    name='GPP',
                    marker=dict(size=4, color=y_train[36][winter_idx+summer_idx].reshape(-1)
                                
                               )), row=2, col=1)


fig.update_layout(
    title="Latent Space at site: IT-Tor",
    legend_title="Seasons",
)
