## Load feature

In [1]:
import joblib
import numpy as np
import torch

from torch.utils.data import Dataset, DataLoader

import torch.nn as nn
import torch.nn.functional as F
import torch

AUTOGRAPHER_FILE = 'autographer_embeded_ft_train.joblib'
TABULAR_FILE = 'tabular_embeded_ft_train.joblib'
TAB_AND_IMG_FILE = 'tabular_with_images_train.joblib' # No need for now

In [2]:
autographer_feat = joblib.load(AUTOGRAPHER_FILE)
tabular_feat = joblib.load(TABULAR_FILE)
df = joblib.load(TAB_AND_IMG_FILE)
groundtruths = {}
for i, l in zip(df['image_path'], df['label']):
    groundtruths['_'.join(i.split('_')[:-1])] = l

In [3]:
class Instance:
    def __init__(self, id, key):
        self.id = id
        self.tab = tabular_feat[key]
        self.images = np.stack(autographer_feat[key]['features'])
        self.images_path = autographer_feat[key]['images']
        self.label = groundtruths[key]
instance = Instance(0, '1001_trainA_act01')
instance.images.shape

(8, 512)

In [4]:
class Embedder(nn.Module):
    def __init__(self):
        super(Embedder, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(512 + 128, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.5))
            
        self.rnn = nn.GRU(512, 512, 2, dropout=0.5, bidirectional=True)
        self.mlp2 = nn.Linear(512, 512)
        
    def forward(self, tab, images):
        # images: seq len, batch size, 512
        # tab: batch size, 128
        tab = tab.repeat((images.shape[0], 1, 1))
        features = self.mlp(torch.cat([images, tab], dim=2))
        _, hidden = self.rnn(features)
        
        return self.mlp2(hidden[-1]) # batch size, 512
    
class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.mlp = nn.Sequential(
            nn.Linear(2048, 1024),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.5),
            nn.Linear(1024, 512),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Dropout(0.5),
            nn.Linear(512, 1),
            nn.Sigmoid()
        )
    def forward(self, feat1, feat2):
        dist = torch.abs(feat1-feat2)
        mul = torch.mul(feat1, feat2)
        return self.mlp(torch.cat([feat1, feat2, dist, mul], dim=1))


In [5]:
from sklearn.model_selection import StratifiedShuffleSplit
import random
import itertools
from tqdm import tqdm
sss = StratifiedShuffleSplit(n_splits=5, test_size=80, random_state=0)
train_instances = np.array([Instance(i, key) for i, key in enumerate(tabular_feat.keys())])

class PairwiseDataset(Dataset):
    def __init__(self, instances, k, valid=False):
        super(PairwiseDataset, self).__init__()
        self.instances = instances
        self.combinations = list(itertools.product(range(len(instances)), repeat=2))
        self.weights = [10 if instances[comb[0]].label == instances[comb[1]].label else 1 for comb in self.combinations]
        self.samples = random.choices(self.combinations, weights=self.weights, k=k)
        self.k = k
        self.valid = valid

    def __len__(self):
        if self.valid:
            return len(self.combinations)
        return len(self.samples)

    def shuffle(self):
        self.samples = random.choices(self.combinations, weights=self.weights, k=self.k)

    def __getitem__(self, i):
        samples = self.samples
        if self.valid:
            samples = self.combinations
        instances1 = self.instances[samples[i][0]]
        instances2 = self.instances[samples[i][1]]
        if instances1.label == instances2.label:
            label = 1 - (1 - self.valid) * random.random() / 20
        else:
            label = (1 - self.valid) * random.random() / 20
        return instances1, instances2, label

def collate_fn(pairs):
    instances1, instances2, labels = zip(*pairs)
    tab1 = torch.tensor([instance.tab for instance in instances1]).squeeze(1).float().cuda()
    tab2 = torch.tensor([instance.tab for instance in instances2]).squeeze(1).float().cuda()
    images1 = nn.utils.rnn.pad_sequence([torch.tensor(instance.images) for instance in instances1]).float().cuda()
    images2 = nn.utils.rnn.pad_sequence([torch.tensor(instance.images) for instance in instances2]).float().cuda()
    labels = torch.tensor(labels).float().cuda()
    return tab1, images1, tab2, images2, labels

def f1_loss(y_true:torch.Tensor, y_pred:torch.Tensor, is_training=False) -> torch.Tensor:
        assert y_true.ndim == 1
        assert y_pred.ndim == 1 

        tp = (y_true * y_pred).sum().to(torch.float32)
        tn = ((1 - y_true) * (1 - y_pred)).sum().to(torch.float32)
        fp = ((1 - y_true) * y_pred).sum().to(torch.float32)
        fn = (y_true * (1 - y_pred)).sum().to(torch.float32)

        epsilon = 1e-7

        precision = tp / (tp + fp + epsilon)
        recall = tp / (tp + fn + epsilon)

        f1 = 2* (precision*recall) / (precision + recall + epsilon)
        f1.requires_grad = is_training
        return f1
    
for split, (train_index, test_index) in enumerate(sss.split(train_instances, [instance.label for instance in train_instances])):
    train = PairwiseDataset(train_instances[train_index], k=80000)
    valid = PairwiseDataset(train_instances[test_index], k=1000, valid=True)
    bce = nn.BCELoss()
    bce.cuda()
    embedder = Embedder()
    discriminator = Discriminator()
    embedder.cuda()
    discriminator.cuda()
    optimizer = torch.optim.Adam(itertools.chain(embedder.parameters(), discriminator.parameters()), lr=1e-3)
    print("=" * 80)
    print("SPLIT", split)
    best_val = 100
    for epoch in range(5):
        train.shuffle()
        train_dataloader = DataLoader(train, batch_size=64, collate_fn=collate_fn)
        embedder.train()
        discriminator.train()
        total_loss = []
        for i, (tab1, images1, tab2, images2, labels) in enumerate(train_dataloader):
            optimizer.zero_grad()
            emb1 = embedder(tab1, images1)
            emb2 = embedder(tab2, images2)
            preds = discriminator(emb1, emb2)
            loss = bce(preds.squeeze(), labels.squeeze())
            loss.backward()
            total_loss.append(loss.item())
            optimizer.step()
        print(f"Epoch {epoch}: [train loss: {np.mean(total_loss):.4f}]", end=" ")
        acc = 0
        total_loss = []
        f1 = []
        with torch.no_grad():
            embedder.eval()
            discriminator.eval()
            count = 0
            valid_dataloader = DataLoader(valid, batch_size=64, collate_fn=collate_fn)
            for i, (tab1, images1, tab2, images2, labels) in enumerate(valid_dataloader):
                emb1 = embedder(tab1, images1)
                emb2 = embedder(tab2, images2)
                preds = discriminator(emb1, emb2)
                acc += sum((preds.round().squeeze() == labels.squeeze()).float()).item()
                f1.append(f1_loss(labels.squeeze(), preds.round().squeeze()).item())
                total_loss.append(bce(preds.squeeze(), labels.squeeze()).item())
                count += 1
            total_loss = np.mean(total_loss)
            print(f"[val loss: {acc/len(valid):.4f}, {np.mean(f1):.4f}, {total_loss:.4f}]")
            if total_loss < best_val:
                best_val = total_loss
                torch.save({"embedder": embedder.state_dict(),
                "discriminator": discriminator.state_dict()}, f"model_{split}.bin")
                print("Saved")
            else:
                break



SPLIT 0
Epoch 0: [train loss: 0.1328] [val loss: 1.0000, 1.0000, 0.0231]
Saved
Epoch 1: [train loss: 0.1195] [val loss: 1.0000, 1.0000, 0.0267]
Epoch 2: [train loss: 0.1199] [val loss: 1.0000, 1.0000, 0.0303]
Epoch 3: [train loss: 0.1465] [val loss: 0.9894, 0.9249, 0.0610]
Epoch 4: [train loss: 0.1228] [val loss: 0.9969, 0.9664, 0.0381]
SPLIT 1
Epoch 0: [train loss: 0.1359] [val loss: 1.0000, 1.0000, 0.0258]
Saved
Epoch 1: [train loss: 0.1197] [val loss: 1.0000, 1.0000, 0.0202]
Saved
Epoch 2: [train loss: 0.1673] [val loss: 0.9955, 0.9604, 0.0399]
Epoch 3: [train loss: 0.1280] [val loss: 1.0000, 1.0000, 0.0301]
Epoch 4: [train loss: 0.1195] [val loss: 1.0000, 1.0000, 0.0283]
SPLIT 2
Epoch 0: [train loss: 0.1341] [val loss: 0.9992, 0.9855, 0.0276]
Saved
Epoch 1: [train loss: 0.1196] [val loss: 0.9991, 0.9835, 0.0285]
Epoch 2: [train loss: 0.1198] [val loss: 0.9991, 0.9835, 0.0314]
Epoch 3: [train loss: 0.1449] [val loss: 0.9967, 0.9671, 0.0278]
Epoch 4: [train loss: 0.1194] [val loss: 0

In [6]:
states = torch.load("model_full.bin")
embedder.load_state_dict(states["embedder"])
discriminator.load_state_dict(states["discriminator"])

from collections import defaultdict
embedder.eval()
discriminator.eval()

def rank(embedder, discriminator, example):
    tab1 = torch.tensor([example.tab]).squeeze(1).float().cuda()
    images1 = torch.tensor(example.images).float().unsqueeze(1).float().cuda()
    emb1 = embedder(tab1, images1)
    scores = defaultdict(lambda: [])
    for instance in train_instances:
        if instance.id != example.id:
            tab2 = torch.tensor([instance.tab]).squeeze(1).float().cuda()
            images2 = torch.tensor(instance.images).unsqueeze(1).float().cuda()
            emb2 = embedder(tab2, images2)
            pred = discriminator(emb1, emb2)
            scores[instance.label].append(pred.item())
    ave_scores = np.zeros(20)
    for label in scores:
        ave_scores[int(label) - 1] = np.mean(scores[label])
    return np.argmax(ave_scores) + 1, ave_scores

# acc = 0
# for i, example in enumerate(train_instances[200:]):
#     pred, scores = rank(example)
#     acc += pred == example.label
#     print(f"{i}, {pred}({scores[pred - 1]:.4f}), {example.label}({scores[example.label-1]:.4f})")
# #     print(scores)
    
# print(acc, len(instances[200:]))

In [7]:
autographer_feat_test = joblib.load("autographer_embeded_ft_test.joblib")
tabular_feat_test = joblib.load('tabular_embeded_ft_test.joblib')
class TestInstance:
    def __init__(self, id, key):
        self.id = -1
        self.tab = tabular_feat_test['_test_'.join(key.split('_'))]
        self.images = np.stack(autographer_feat_test[key]['features'])
        self.images_path = autographer_feat_test[key]['images']

In [None]:
test_instances = [TestInstance(i, key) for i, key in enumerate(autographer_feat_test.keys())]
acts = {'act00': 'calibration',
                'act01': 'write an email',
                'act02': 'read on screen',
                'act03': 'edit/create presentation',
                'act04': 'zone out and fixate',
                'act05': 'use a calculator to add up numbers on sheet',
                'act06': 'physical precision task',
                'act07': 'put documents in order',
                'act08': 'read text/numbers on page',
                'act09': 'arrange money in change jar',
                'act10': 'write on paper with pen',
                'act11': 'watch a youtube video',
                'act12': 'go to a news website and browse',
                'act13': 'have conversation with experimenter in room',
                'act14': 'make a telephone call',
                'act15': 'drink/eat for 2 minutes',
                'act16': 'close eyes and sit still',
                'act17': 'clean e.g. sweaping the floor, wipe, ...',
                'act18': 'exercise: sit up/stand down repeatedly',
                'act19': 'hand-eye coordination (tennis ball)',
                'act20': 'pace the room',
                }
acts = list(acts.values())

results = np.zeros((5, 140, 20))
for split in range(5):
    states = torch.load(f"model_{split}.bin")
    embedder.load_state_dict(states["embedder"])
    discriminator.load_state_dict(states["discriminator"])
    embedder.eval()
    discriminator.eval()
    print("Split", split)
    for i, (example, key) in tqdm(enumerate(zip(test_instances, autographer_feat_test.keys()))):
        pred, scores = rank(embedder, discriminator, example)
        results[split][i] = scores

results = np.mean(results, axis=0)

In [21]:
for i, (example, key) in tqdm(enumerate(zip(test_instances, autographer_feat_test.keys()))):
    scores = results[i]
    pred = np.argmax(results[i]) + 1
    print(f"{key}, {acts[pred]}({scores[pred - 1]:.4f})")
    print(scores)

140it [00:00, 3609.42it/s]

1001_pred13, close eyes and sit still(0.7701)
[0.02461336 0.03841531 0.02636391 0.03541568 0.02967007 0.02600539
 0.02593976 0.02633449 0.02674963 0.02441375 0.04248182 0.02974187
 0.02678906 0.02624479 0.02636072 0.77014856 0.02794314 0.02906855
 0.03234492 0.02919026]
1001_pred5, edit/create presentation(0.9790)
[0.01928134 0.0190859  0.97895494 0.02033642 0.02256728 0.02106243
 0.02185508 0.02149156 0.02059488 0.01970595 0.02188001 0.0209347
 0.02303529 0.01930667 0.02100879 0.02016639 0.02409818 0.02201975
 0.02410177 0.02004935]
1001_pred1, hand-eye coordination (tennis ball)(0.3693)
[0.02309726 0.02268305 0.0244991  0.02392099 0.02548504 0.02422896
 0.07877237 0.02685663 0.05623076 0.02460905 0.02537168 0.02609237
 0.08519916 0.02352702 0.02763739 0.02384712 0.05051963 0.02642674
 0.36927993 0.02657825]
1001_pred0, read on screen(0.6760)
[0.0246887  0.6759922  0.02579843 0.02597566 0.0289671  0.02648293
 0.02607246 0.02766692 0.02460387 0.02542542 0.08749788 0.10464744
 0.0273428




In [22]:
joblib.dump(results, "results.joblib")

['results.joblib']