In [13]:
import pandas as pd
from sklearn import preprocessing
import torch
from torch import optim

from utils import utils
DEVICE = utils.DEVICE

# 0. Prep Data

In [14]:
source_idx = 1
target_idx = 2
winter_idx = 0

train_source_X = pd.read_csv(f"./deep_occupancy_detection/data/{source_idx}_X_train.csv")
target_X = pd.read_csv(f"./deep_occupancy_detection/data/{target_idx}_X_train.csv")
train_source_y_task = pd.read_csv(f"./deep_occupancy_detection/data/{source_idx}_Y_train.csv")[train_source_X.Season==winter_idx].values.reshape(-1)
target_y_task = pd.read_csv(f"./deep_occupancy_detection/data/{target_idx}_Y_train.csv")[target_X.Season==winter_idx].values.reshape(-1)
train_source_X = train_source_X[train_source_X.Season==winter_idx]
target_X = target_X[target_X.Season==winter_idx]

In [15]:
train_source_X

Unnamed: 0,Energy,Season,Time
1248,227903.247,0,12
1249,310118.934,0,13
1250,371554.363,0,14
1251,851215.054,0,15
1252,731550.916,0,16
...,...,...,...
2715,749873.285,0,39
2716,2196655.709,0,40
2717,691079.162,0,41
2718,937151.981,0,42


In [16]:
target_X

Unnamed: 0,Energy,Season,Time
2176,168649.947,0,12
2177,169192.356,0,13
2178,168560.052,0,14
2179,130375.163,0,15
2180,132788.967,0,16
...,...,...,...
3131,996765.945,0,39
3132,1305499.967,0,40
3133,886801.848,0,41
3134,991335.091,0,42


In [17]:
scaler = preprocessing.StandardScaler()
scaler.fit(train_source_X)
train_source_X = scaler.transform(train_source_X)
target_X = scaler.transform(target_X)

train_source_X, train_source_y_task = utils.apply_sliding_window(train_source_X, train_source_y_task, filter_len=6)
target_X, target_y_task = utils.apply_sliding_window(target_X, target_y_task, filter_len=6)

In [18]:
source_loader, target_loader, _, source_X, target_X, _ = utils.get_loader(train_source_X, target_X, train_source_y_task, target_y_task, shuffle=True)

# 1. H-divergence
## 1.1 Model Fit


In [19]:
feature_extractor = utils.Conv1d(input_size=train_source_X.shape[2]).to(DEVICE)
domain_classifier = utils.Decoder(input_size=128, output_size=1).to(DEVICE)
criterion = torch.nn.BCELoss()

feature_optimizer = optim.Adam(feature_extractor.parameters(), lr=0.0001)
domain_optimizer = optim.Adam(domain_classifier.parameters(), lr=0.0001)

In [20]:
for epoch in range(30):
    for i, (source_data, target_data) in enumerate(zip(source_loader, target_loader)):
        source_X_tmp, source_y = source_data
        source_y = source_y[:, 1]
        target_X, target_y = target_data

        source_feature = feature_extractor(source_X_tmp)
        target_feature = feature_extractor(target_X)

        source_output = domain_classifier(source_feature)
        target_output = domain_classifier(target_feature)

        source_output = torch.sigmoid(source_output).reshape(-1)
        target_output = torch.sigmoid(target_output).reshape(-1)
        loss = criterion(source_output, torch.zeros_like(source_output))
        loss += criterion(target_output, torch.ones_like(target_output))
        feature_optimizer.zero_grad()
        domain_optimizer.zero_grad()
        loss.backward()
        feature_optimizer.step()
        domain_optimizer.step()
    if epoch % 10 == 0:
        print(f"Loss: {loss.item()}")

Loss: 1.3868377208709717
Loss: 1.37479567527771
Loss: 1.0229378938674927


## 1.2 Calc Error Rate

In [21]:
pred_y = domain_classifier(feature_extractor(source_X))
pred_y = torch.sigmoid(pred_y).reshape(-1)
pred_y = pred_y > 0.5
source_y = torch.zeros_like(pred_y)
acc_source = sum(pred_y == source_y) / pred_y.shape[0]
err_source = 1-acc_source

In [22]:
pred_y = domain_classifier(feature_extractor(target_X))
pred_y = torch.sigmoid(pred_y).reshape(-1)
pred_y = pred_y > 0.5
target_y = torch.ones_like(pred_y)
acc_target = sum(pred_y == target_y) / pred_y.shape[0]
err_target = 1-acc_target

In [23]:
h_divergence = 2*(1- (err_source+err_target))

In [24]:
h_divergence

tensor(1.9959, device='cuda:0')