In [1]:
import pandas as pd
from preprocess import prepare_df
import torch
import torch.nn as nn
import torch.nn.functional as F

import torch.optim as optim
from torch.autograd import Function
from sklearn.metrics import r2_score
import numpy as np

In [2]:
data = pd.read_csv('../lstm/utils/df_imputed.csv', index_col=0).drop(columns=['date'])

In [3]:
df_sensor_s, df_sensor_t, df_gpp_s, df_gpp_t, df_domain_s, df_domain_t, masks = prepare_df(data)

In [4]:
DEVICE = torch.device("cpu")

In [5]:
# Part 1: feature extraction
class CausalConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation=1, **kwargs):
        super().__init__()
        self.pad = (kernel_size - 1) * dilation
        self.conv = nn.Conv1d(in_channels, out_channels, kernel_size, padding=self.pad, dilation=dilation, **kwargs)
    
    def forward(self, x):
        x = self.conv(x)
        return x[:, :, :-self.conv.padding[0]]

class ConvFeatureExtractor(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv_blocks = nn.Sequential(
            CausalConv1d(11, 32, 2),
            nn.ReLU(),
            CausalConv1d(32, 64, 2),
            nn.ReLU(),
            CausalConv1d(64, 128, 2),
        )
    def forward(self, x):
        return self.conv_blocks(x)

In [6]:
# top branch: regressor 
class RecurrentRegressor(nn.Module):
    # gets a sequence of shape (seq len, batch size, n_channels)
    def __init__(self, input_dim, hidden_dim, num_layers, bidirectional):
        super().__init__()
        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.num_layers = num_layers
        self.bidirectional = bidirectional
        self.num_directions = 2 if self.bidirectional else 1

        self.rnn = nn.LSTM(input_size=input_dim, hidden_size=hidden_dim, num_layers=num_layers, bidirectional=bidirectional)
        self.fc = nn.Sequential(
            nn.Linear(self.num_directions * hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
        )

    def forward(self, x):
        output, (h, c) = self.rnn(x)
        # output: seq_len, batch, num_directions * hidden_size
        T, N, C = output.shape
        output = output.view(T*N, -1) # seq_len * batch, num_directions * hidden_size
        output = self.fc(output).view(T, N, -1) #  seq_len, batch, 1
        return output

In [7]:
# one domain discriminator for every source domain
class DomainClassifier(nn.Module):
    # gets a sequence of shape (seq len, batch size, n_channels) --> (batch_size, seq_len * n_channels)
    def __init__(self, n_channels):
        super().__init__()
        self.n_channels = n_channels
        self.fc = nn.Sequential(
            nn.Linear(n_channels, 32),
            nn.ReLU(),
            nn.Linear(32, 1),
            nn.Sigmoid()
        )
    def forward(self, x):
        # x: (seq len, batch size, n_channels)
        # average over the seq_len dim
        # 1 prediction for each time step
        #x = torch.mean(x, dim=1) # x: (batch_size, n_channels)
        y_pred = self.fc(x)
        return y_pred

In [8]:
class ReverseLayerF(Function):
    @staticmethod
    def forward(ctx, x, alpha):
        ctx.alpha = alpha

        return x.view_as(x)

    @staticmethod
    def backward(ctx, grad_output):
        output = grad_output.neg() * ctx.alpha

        return output, None

In [9]:
class Model(nn.Module):
    def __init__(self, extractor, regressor, classifiers):
        super().__init__()
        self.extractor = extractor
        self.regressor = regressor
        self.classifiers = classifiers

    def forward(self, s, t, alpha, k):
        s = s.permute(0, 2, 1)
        t = t.permute(0, 2, 1)

        s_features = self.extractor(s)
        t_features = self.extractor(t)
        
        s_features = s_features.permute(2, 0, 1)
        t_features = t_features.permute(2, 0, 1)
        y_pred = self.regressor(s_features)
        
        rev_s_features = ReverseLayerF.apply(s_features, alpha)
        rev_t_features = ReverseLayerF.apply(t_features, alpha)
    

        s_domain_pred = self.classifiers[k](rev_s_features)
        t_domain_pred = self.classifiers[k](rev_t_features)

        return y_pred, s_domain_pred, t_domain_pred
    
    def inference(self, x):
        x = x.permute(0, 2, 1)
        x_features = self.extractor(x)
        x_features = x_features.permute(2, 0, 1)
        y_pred = self.regressor(x_features)
        return y_pred.squeeze()


In [23]:
def loss_fn(source_domains, target_domains, y, source_preds, target_preds, y_pred):
    reg_loss = nn.MSELoss()(y_pred, y)
    loss2 = nn.BCELoss()(source_preds.squeeze(), source_domains.squeeze())
    loss3 = nn.BCELoss()(target_preds.squeeze(), target_domains.squeeze())
    domain_loss = loss2 + loss3
    loss = reg_loss + domain_loss
    return loss, domain_loss, reg_loss

In [11]:
# drop domains with <= 2 samples
unwanted = [1, 6, 8]
  
for ele in sorted(unwanted, reverse = True): 
    del df_sensor_s[ele]
    del df_gpp_s[ele]

In [31]:
DOMAINS = 6
feature_extractor = ConvFeatureExtractor()
regressor = RecurrentRegressor(128, 256, 1, False)
classifiers = []
for t in range(DOMAINS):
    classifiers.append(DomainClassifier(128).to(DEVICE))

model = Model(feature_extractor, regressor, classifiers).to(DEVICE)
optimizer = torch.optim.RMSprop(model.parameters())

In [32]:
import pdb
EPOCHS = 1000
gamma = 1e-2
mu= 1e-2
alpha = 1 
for epoch in range(EPOCHS):
    train_loss = 0.0
    train_reg_loss = 0.0
    train_domain_loss = 0.0
    train_r2 = 0.0
    target_r2 = 0.0
    model.train()
    for t in range(len(df_sensor_t)):
        domain_losses = []
        x_t = torch.FloatTensor(df_sensor_t[t].values).unsqueeze(1).to(DEVICE)
        y_t = torch.FloatTensor(df_gpp_t[t].values).to(DEVICE)
        domain_t  = torch.ones(x_t.shape[0]).to(DEVICE)
        for k in range(DOMAINS):
            s_k = np.random.choice(range(len(df_sensor_s[k])))
            x_s  = torch.FloatTensor(df_sensor_s[k][s_k].values).unsqueeze(1).to(DEVICE)
            y_s  = torch.FloatTensor(df_gpp_s[k][s_k].values).to(DEVICE)
            domain_s = torch.zeros(x_s.shape[0]).to(DEVICE)
            y_pred, s_preds, t_preds = model(x_s, x_t, alpha, k)
            y_pred = y_pred.squeeze()
         
            # Get loss and update
            loss = loss_fn(domain_s, domain_t, y_s, s_preds, t_preds, y_pred)
            domain_losses.append(loss)
            train_r2 += r2_score(y_true=y_s.detach().cpu().numpy(), y_pred=y_pred.detach().cpu().numpy())
        losses = torch.stack([domain_losses[j][2] for j in range(DOMAINS)])
        d_losses = torch.stack([domain_losses[j][1] for j in range(DOMAINS)])
        print(losses)
        print(d_losses)
        optimizer.zero_grad()
        loss = torch.log(torch.sum(torch.exp(gamma* (losses + mu * d_losses))))/gamma
        print(loss)
        loss.backward()
        optimizer.step() 
        train_loss += loss.item()
    
    model.eval()
    for t in range(len(df_sensor_t)):
        x_t = torch.FloatTensor(df_sensor_t[t].values).unsqueeze(1).to(DEVICE)
        y_t = torch.FloatTensor(df_gpp_t[t].values).to(DEVICE)        
        y_pred = model.inference(x_t)
        target_r2 += r2_score(y_true=y_t.detach().cpu().numpy()[masks[t]], y_pred=y_pred.detach().cpu().numpy()[masks[t]])
    
    train_loss /= len(df_sensor_t)
    train_r2 /= len(df_sensor_t)*DOMAINS
    target_r2 /= len(df_sensor_t)
    print(f"Epoch: {epoch+1}/{EPOCHS}")
    print(f"Train loss: {train_loss:.4f} | R2: {train_r2:.4f}")
    print(f"Target R2: {target_r2:.4f}")


        


tensor([1.0060, 1.0056, 1.0058, 1.0053, 1.0061, 1.0048],
       grad_fn=<StackBackward>)
tensor([1.3931, 1.3892, 1.3877, 1.3866, 1.3932, 1.3900],
       grad_fn=<StackBackward>)
tensor(180.1954, grad_fn=<DivBackward0>)
tensor([131.3430, 127.2142, 124.5172, 129.8040, 142.7207, 145.7296],
       grad_fn=<StackBackward>)
tensor([1.4089, 1.5767, 1.4233, 1.4098, 1.3839, 1.3959],
       grad_fn=<StackBackward>)
tensor(313.0600, grad_fn=<DivBackward0>)
tensor([508.1199, 503.4884, 504.1837, 515.5352, 507.0719, 507.4742],
       grad_fn=<StackBackward>)
tensor([1.3925, 1.4737, 1.4329, 1.3872, 1.3973, 1.3892],
       grad_fn=<StackBackward>)
tensor(686.9134, grad_fn=<DivBackward0>)
tensor([8.5085, 8.5081, 8.5083, 8.5089, 8.5099, 8.5094],
       grad_fn=<StackBackward>)
tensor([1.4471, 1.4087, 1.4983, 1.3865, 1.4635, 1.4276],
       grad_fn=<StackBackward>)
tensor(187.6992, grad_fn=<DivBackward0>)
tensor([55.2010, 55.2044, 55.1956, 55.1997, 55.2060, 55.2062],
       grad_fn=<StackBackward>)
tenso

tensor([0.9998, 0.9995, 0.9998, 0.9991, 0.9999, 0.9998],
       grad_fn=<StackBackward>)
tensor([18.3756,  4.3100,  6.9062, 10.6044,  7.0134, 31.8287],
       grad_fn=<StackBackward>)
tensor(180.3074, grad_fn=<DivBackward0>)
tensor([0.9998, 0.9997, 0.9998, 0.9991, 0.9994, 0.9999],
       grad_fn=<StackBackward>)
tensor([23.0701,  4.6911,  7.2841, 12.9055,  7.5466, 34.5245],
       grad_fn=<StackBackward>)
tensor(180.3256, grad_fn=<DivBackward0>)
tensor([0.9998, 0.9997, 0.9998, 0.9993, 0.9999, 0.9986],
       grad_fn=<StackBackward>)
tensor([25.8409,  5.0757,  7.8002, 15.8684,  8.2182, 38.1150],
       grad_fn=<StackBackward>)
tensor(180.3437, grad_fn=<DivBackward0>)
tensor([0.9999, 0.9991, 0.9998, 0.9998, 0.9999, 0.9987],
       grad_fn=<StackBackward>)
tensor([23.1330,  4.4040,  8.2010, 18.5322,  8.7708, 39.9016],
       grad_fn=<StackBackward>)
tensor(180.3471, grad_fn=<DivBackward0>)
tensor([0.9998, 0.9997, 0.9998, 0.9991, 0.9997, 0.9998],
       grad_fn=<StackBackward>)
tensor([25.

KeyboardInterrupt: 

In [None]:
x_t


In [13]:
y_pred = model.inference(x_t)





RuntimeError: CUDA error: device-side assert triggered

In [183]:
from plotly import graph_objects as go

fig = go.Figure()

fig.add_trace(go.Scatter(y=y_t.detach().cpu(),
                    mode='markers',
                    name='gt', 
                    marker=dict(size=3)))

fig.add_trace(go.Scatter(y=y_pred.detach().cpu(),
                    mode='markers',
                    name='pred', 
                    marker=dict(size=3)))

In [204]:
for k in range(len(df_sensor_s)):
    print(len(df_gpp_s[k]))

14
3
3
4
5
9
