In [146]:
import pandas as pd
import numpy as np
from preprocess import prepare_df
import torch.nn as nn
import torch
import torch.nn.functional as F
from model.model import Model
import torch.optim as optim
from sklearn.metrics import r2_score



In [16]:
DEVICE = torch.device("cuda:" + str(3))

In [12]:
data = pd.read_csv('utils/df_imputed.csv', index_col=0).drop(columns=['date'])

import operator
raw = pd.read_csv('../data/df_20210510.csv', index_col=0)['GPP_NT_VUT_REF']
raw = raw[raw.index != 'CN-Cng']
sites = raw.index.unique()

masks = []
for s in sites:
    mask = raw[raw.index == s].isna().values
    masks.append(list(map(operator.not_, mask)))

In [13]:
df_sensor, df_meta, df_gpp = prepare_df(data)

Index(['AU-ASM', 'CN-Qia', 'DE-Obe', 'DE-Tha', 'FI-Hyy', 'FI-Sod', 'FR-LBr',
       'IT-Lav', 'IT-SR2', 'IT-SRo', 'NL-Loo', 'RU-Fyo', 'US-GLE', 'US-Me2'],
      dtype='object', name='sitename')

In [198]:
sites_to_train = []
for site in data[data.classid != "CRO"].index.unique():  
    sites_to_train.append((data.index.unique() == site).argmax())
    
sites_to_test = []
for site in data[data.classid == "CRO"].index.unique():  
    sites_to_test.append((data.index.unique() == site).argmax())

In [188]:
len(x_train)

39

In [199]:
#sites_to_train = list(range(len(df_sensor)))
#sites_to_train.remove(0)
#sites_to_test = [0]

x_train = [df_sensor[i].values for i in sites_to_train]
conditional_train = [df_meta[i].values for i in sites_to_train]
y_train = [df_gpp[i].values.reshape(-1,1) for i in sites_to_train]

x_test = [df_sensor[i].values for i in sites_to_test]
conditional_test = [df_meta[i].values for i in sites_to_test]
y_test = [df_gpp[i].values.reshape(-1,1) for i in sites_to_test]

In [79]:
from torch.autograd import Function


class ReverseLayerF(Function):

    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None


In [153]:
import torch.nn as nn


class CNNModel(nn.Module):

    def __init__(self, input_dim):
        super(CNNModel, self).__init__()
        self.feature = nn.Sequential()
        self.feature.add_module('f_lstm', nn.LSTM(input_size=input_dim, hidden_size=256))
        
        
        self.regressor = nn.Sequential()
        self.regressor.add_module('c_fc1', nn.Linear(256, 64))
        #self.regressor.add_module('c_bn1', nn.BatchNorm1d(100))
        self.regressor.add_module('c_relu1', nn.ReLU(True))
        #self.regressor.add_module('c_drop1', nn.Dropout2d())
        self.regressor.add_module('c_fc2', nn.Linear(64,16))
        #self.regressor.add_module('c_bn2', nn.BatchNorm1d(20))
        self.regressor.add_module('c_relu2', nn.ReLU(True))
        self.regressor.add_module('c_fc3', nn.Linear(16, 1))

        self.domain_classifier = nn.Sequential()
        self.domain_classifier.add_module('d_fc1', nn.Linear(256, 100))
        #self.domain_classifier.add_module('d_bn1', nn.BatchNorm1d(100))
        self.domain_classifier.add_module('d_relu1', nn.ReLU(True))
        self.domain_classifier.add_module('d_fc2', nn.Linear(100, 2))
        self.domain_classifier.add_module('d_softmax', nn.LogSoftmax(dim=1))

    def forward(self, input_data, alpha):
        feature, (h,c) = self.feature(input_data.unsqueeze(1))
        feature = feature.squeeze(1)
        reverse_feature = ReverseLayerF.apply(feature, alpha)
        gpp_output = self.regressor(feature)
        domain_output = self.domain_classifier(reverse_feature)

        return gpp_output, domain_output

In [200]:
model = CNNModel(11).to(DEVICE)
optimizer = optim.Adam(model.parameters(), lr=1e-3)


In [201]:
loss_regression = torch.nn.MSELoss()
loss_domain = torch.nn.NLLLoss()

In [202]:
for e in range(100):
    model.train()
    i = 0
    err_s_reg = 0
    err_s_domain = 0
    err_t_domain = 0
    for (x, y) in zip(x_train, y_train):
        # training model using source data
        x = torch.FloatTensor(x).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)
        domain_label = torch.zeros(x.shape[0]).long().to(DEVICE)
        i += 1
        p = float(i + e * len(x_train)) / 50 / len(x_train)
        alpha = 2. / (1. + np.exp(-10 * p)) - 1

        optimizer.zero_grad()
        alpha *= 0.9
        regression_output, domain_output = model(input_data=x, alpha=alpha)
        err_s_reg += loss_regression(y, regression_output)
        err_s_domain += loss_domain(domain_output, domain_label)
        
        # training model using source data
    for (x, y) in zip(x_test, y_test):
        x_t = torch.FloatTensor(x).to(DEVICE)
        domain_label = torch.ones(x_t.shape[0]).long().to(DEVICE)
        _, domain_output = model(input_data=x_t, alpha=alpha)
        err_t_domain += loss_domain(domain_output, domain_label)
        
    err = err_t_domain + err_s_domain + err_s_reg
    err.backward()
    optimizer.step()
    
    r2 = 0
    model.eval()
    for (x, y) in zip(x_test, y_test):
        x = torch.FloatTensor(x).to(DEVICE)
        y = torch.FloatTensor(y).to(DEVICE)
        regression_output, domain_output = model(input_data=x, alpha=alpha)
        r2 += r2_score(y_true=y.detach().cpu().numpy(), y_pred=regression_output.detach().cpu().numpy())
    r2 /= len(x_test)
    print(f"Epoch {e}")
    print(f" err_t_domain: {err_t_domain:.3f} | err_s_domain:{err_s_domain:.3f} |err_s_reg: {err_s_reg:.3f}")
    print(f"Test R2: {r2}")

Epoch 0
 err_t_domain: 1.260 | err_s_domain:38.807 |err_s_reg: 51.721
Test R2: -0.008042988035887544
Epoch 1
 err_t_domain: 1.283 | err_s_domain:38.139 |err_s_reg: 51.320
Test R2: -0.0004105433270125691
Epoch 2
 err_t_domain: 1.304 | err_s_domain:37.562 |err_s_reg: 50.878
Test R2: 0.009130785000467989
Epoch 3
 err_t_domain: 1.324 | err_s_domain:37.009 |err_s_reg: 50.321
Test R2: 0.02215661434211208


KeyboardInterrupt: 