In [1]:
import numpy as np 
import torch 
import torch.nn.functional as F 
from torch import nn, Tensor 

from random import sample, choice
from collections import defaultdict
from copy import deepcopy

In [7]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [9]:
class BiLSTMEncoder(nn.Module):
    def __init__(self,seq_len, input_size, hidden_size,linear_filters,embedding_size:int, num_layers = 1,bidirectional=True,batch_size=32):
        super(BiLSTMEncoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.linear_filters = linear_filters
        self.embedding_size = embedding_size
        self.bidirectional = bidirectional
        self.batch_size = batch_size
        self.seq_len = seq_len

        # define LSTM layer
        self.layers = []

        # add linear layers 
        for __id,layer_out in enumerate(self.linear_filters):
            if __id == 0:
                self.layers.append(nn.Linear(self.input_size, layer_out))
            else:
                self.layers.append(nn.Linear(self.linear_filters[__id-1], layer_out))

        # add lstm layer
        self.lstm = nn.LSTM(input_size = layer_out, hidden_size = self.hidden_size,
                            num_layers = self.num_layers, bidirectional=self.bidirectional,
                            batch_first=True)
        
        self.net = nn.Sequential(*self.layers)

        #add embedding out
        if bidirectional:
            self.out_linear = nn.Linear(self.hidden_size*4, self.embedding_size)
        else:
            self.out_linear = nn.Linear(self.hidden_size*2, self.embedding_size)

        
    def forward(self, x_input):
        '''
        : param x_input:               input of shape (seq_len, # in batch, input_size)
        : return lstm_out, hidden:     lstm_out gives all the hidden states in the sequence; hidden gives the hidden state and cell state for the last element in the sequence                         
        '''
        
        x = self.net(x_input)
        lstm_out, self.hidden = self.lstm(x)
        hidden_transformed = torch.concat(self.hidden,0)
        hidden_transformed = torch.transpose(hidden_transformed,0,1)
        hidden_transformed = torch.flatten(hidden_transformed,start_dim=1)
        
        hidden_transformed = self.out_linear(hidden_transformed)
        
        return lstm_out, hidden_transformed

    
class BiLSTMDecoder(nn.Module):
    def __init__(self,seq_len, input_size, hidden_size, linear_filters,embedding_size:int, num_layers = 1,bidirectional=True,batch_size=32, device='cpu'):
        super(BiLSTMDecoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.linear_filters = linear_filters[::-1]
        self.embedding_size = embedding_size
        self.bidirectional = bidirectional
        self.batch_size = batch_size
        self.seq_len = seq_len
        self.device = device

        if bidirectional:
            self.input_linear = nn.Linear(self.embedding_size,4*self.hidden_size)
        else:
            self.input_linear = nn.Linear(self.embedding_size,2*self.hidden_size)

        # define LSTM layer
        self.layers = []
        # add lstm
        self.lstm = nn.LSTM(input_size = self.linear_filters[0], hidden_size = self.hidden_size,
                            num_layers = self.num_layers, bidirectional=True,
                            batch_first=bidirectional)

                        
        # add linear layers 
        if bidirectional:
            self.layers.append(nn.Linear(2*hidden_size,self.linear_filters[0]))
        else:
            self.layers.append(nn.Linear(hidden_size,self.linear_filters[0]))

        for __id,layer_in in enumerate(self.linear_filters):
            if __id == len(linear_filters)-1:
                self.layers.append(nn.Linear(layer_in,self.input_size))
            else:
                self.layers.append(nn.Linear(layer_in,self.linear_filters[__id+1]))

        self.net = nn.Sequential(*self.layers)

        
        

    def forward(self,encoder_hidden):
        '''
        : param x_input:               input of shape (seq_len, # in batch, input_size)
        : return lstm_out, hidden:     lstm_out gives all the hidden states in the sequence; hidden gives the hidden state and cell state for the last element in the sequence                         
        '''
        
        
        hidden_shape = encoder_hidden.shape
        encoder_hidden = self.input_linear(encoder_hidden)
        
        if self.bidirectional:
            hidden = encoder_hidden.view((self.batch_size,4,self.hidden_size))
            # print(hidden.shape)
            hidden = torch.transpose(hidden,1,0)
            h1,h2,c1,c2 = torch.unbind(hidden,0)
            h,c = torch.stack((h1,h2)),torch.stack((c1,c2))
        else:
            hidden = encoder_hidden.view((self.batch_size,2,self.hidden_size))
            hidden = torch.transpose(hidden,1,0)
            h,c = torch.unbind(hidden,0)
        
        dummy_input = torch.rand((self.batch_size,self.seq_len,self.hidden_size), requires_grad=True)
        dummy_input = dummy_input.to(self.device)
        
        lstm_out, self.hidden = self.lstm(dummy_input,(h,c))
        x = self.net(lstm_out)
        
        return x

class BiLSTMEncDecModel(nn.Module):
    def __init__(self,seq_len, input_size, hidden_size, linear_filters=[128,256,512],embedding_size:int=256, num_layers = 1,bidirectional=True, batch_size=32, device='cpu'):
        super(BiLSTMEncDecModel, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.linear_filters = linear_filters[::-1]
        self.embedding_size = embedding_size
        self.bidirectional = bidirectional
        self.batch_size = batch_size
        self.seq_len = seq_len
        
        self.encoder = BiLSTMEncoder(seq_len, input_size, hidden_size, linear_filters,embedding_size, num_layers = 1,bidirectional=True,batch_size=32)
        self.decoder = BiLSTMDecoder(seq_len, input_size, hidden_size, linear_filters,embedding_size, num_layers = 1,bidirectional=True,batch_size=32, device=device)
        
    def forward(self,x):
        lstm_out,embedding = self.encoder(x)
        decoder_out = self.decoder(embedding)
        
        return decoder_out, embedding  

In [63]:
class DistanceCCELoss(nn.Module):
    # def __init__(self, action_feats, label_map, pos_thr=0.05, neg_thr=0.95, agg="mean"):
    def __init__(self, reference_mat, distance='cosine', theta=1e-4, agg="mean", device="cpu"):
        super(DistanceCCELoss, self).__init__()
        self.ref_mat = reference_mat
        self.n, self.c, f = reference_mat.shape 
        self.ref_mat = reference_mat.view(self.n*self.c, 1, f) 
        
        self.distance = distance
        self.get_disMet(distance)

        self.agg = agg 
        self.device = device 
        self.theta = theta
        self.cceLoss = nn.CrossEntropyLoss()

    def get_disMet(self, distance):
        if distance == 'cosine':
            self.disMet = nn.CosineSimilarity(dim=2, eps=1e-6)
        else:
            self.disMet = nn.PairwiseDistance(p=2)

    def forward(self, pred_fts, y):
        dist = self.disMet(pred_fts, self.ref_mat)
        if self.distance == 'cosine':
            dist = 1- torch.abs(dist)

        class_dist = dist.view(self.n, self.c, -1)
        mean_dist = torch.transpose(class_dist.mean(dim=0), 0, 1)
        cce_loss = self.cceLoss(mean_dist, y)
        return cce_loss

In [64]:
loss_func = DistanceCCELoss(reference_mat=torch.randn((10, 18,128)),distance='cosine')

In [67]:
sample_input = torch.randn((32, 128))
sample_classes = torch.abs(torch.randn(32)*8).long()

In [68]:
loss = loss_func(sample_input, sample_classes)

torch.Size([10, 18, 32])


In [69]:
loss

tensor(2.8897)

In [None]:
import torch

tensor1 = torch.randn(32, 128)
tensor2 = torch.randn(10, 18, 128)

# Reshape the second tensor to (10*18, 128)
tensor2 = tensor2.reshape(10*18, 1, 128)

# Calculate the cosine distance between the two tensors
dist = nn.PairwiseDistance()
cosine_distance = 1 - dist(tensor1, tensor2)

class_distances = cosine_distance.view(10, 18, 32)
class_distances.shape


torch.Size([10, 18, 32])

In [38]:
class_distances.sum(dim=0).shape

torch.Size([18, 32])

In [58]:
t = torch.transpose(class_distances.mean(dim=0), 0, 1).shape

torch.Size([32, 18])

---

In [None]:
class DistanceCCELoss(nn.Module):
    # def __init__(self, action_feats, label_map, pos_thr=0.05, neg_thr=0.95, agg="mean"):
    def __init__(self, action_feats, label_map, distance='cosine', k=10, pos_thr=0.05, neg_thr=0.95, agg="mean", device="cpu"):
        super(DistanceCCELoss, self).__init__()
        self.action_feats = deepcopy(action_feats)
        self.label_map = label_map
        self.distance = distance
        self.get_disMet(distance)
        self.avg_vector(action_feats)
        self.k = k
        self.pos_thr = pos_thr
        self.neg_thr = neg_thr
        self.agg = agg 
        self.device = device 
        self.cceLoss = nn.CrossEntropyLoss()

    def get_disMet(self, distance):
        if distance == 'cosine':
            self.disMet = nn.CosineSimilarity(dim=-1, eps=1e-6)
        else:
            self.disMet = nn.PairwiseDistance(p=2)

    def avg_vector(self, action_feats):
        avg_feats = {i: v.mean(axis=0) for i,v in action_feats.items()}
        keys = list(avg_feats.keys())
        keys.sort()
        self.ref_mat = torch.Tensor([avg_feats[i] for i in keys]).to(device)

    def forward(self, pred_fts, y):
        # print(pred_fts.shape, self.ref_mat.shape)
        pred_fts = torch.unsqueeze(pred_fts, 1)
        dist = self.disMet(pred_fts, self.ref_mat)
        if self.distance == 'cosine':
            dist = 1- torch.abs(dist)

        # print(dist.shape)
        cce_loss = self.cceLoss(dist, y)
        pred_y = torch.argmin(dist, dim=-1)
        return cce_loss, pred_y

In [6]:
l = [9, 3, 5, 6, 10, 2, 0, 1, 12, 11]
l.sort()
l

[0, 1, 2, 3, 5, 6, 9, 10, 11, 12]

---

In [3]:
import torch
from torch import nn

In [4]:
input1 = torch.randn(100, 128)
input2 = torch.randn(100, 128)
cos = nn.CosineSimilarity(dim=1, eps=1e-6)
output = cos(input1, input2)
output

tensor([ 1.5105e-01,  9.9553e-02,  1.4555e-02, -4.6371e-02,  3.1975e-02,
         1.3174e-01, -8.3854e-03,  1.3381e-01, -5.5751e-02, -3.1016e-02,
        -2.5598e-02, -2.9823e-02,  5.5216e-02,  8.6839e-03, -1.1508e-01,
        -1.0554e-03, -8.1863e-02,  7.1212e-02,  1.1283e-01, -2.0324e-02,
         2.8713e-02, -7.7041e-02,  5.6254e-02,  1.9283e-01,  1.0349e-01,
        -3.2376e-03, -2.0324e-01,  2.8374e-02,  4.5663e-02,  7.7501e-03,
         2.5818e-02,  3.5124e-02,  1.0218e-01, -2.4832e-02,  1.0733e-01,
         6.1876e-02,  5.0084e-02,  1.8377e-01,  5.5532e-02,  3.1180e-03,
        -1.2405e-01, -2.0716e-01, -6.0127e-03,  3.5816e-03, -1.3235e-01,
         1.5695e-02,  5.4732e-02,  1.4135e-02,  8.7572e-02, -8.4399e-02,
         5.5877e-02,  5.8034e-02,  2.4401e-02, -1.1981e-01,  3.0698e-02,
        -2.9763e-02, -1.0775e-01, -2.3126e-02,  1.1734e-01, -1.6157e-02,
         1.0567e-01,  4.4563e-02, -4.5023e-02,  4.9310e-02, -3.2808e-02,
        -6.1295e-02, -9.3148e-03,  4.6464e-02,  4.7

---

In [4]:
label_map = [(1, 'lying'),
 (2, 'sitting'),
 (3, 'standing'),
 (4, 'walking'),
 (5, 'running'),
 (6, 'cycling'),
 (7, 'Nordic walking'),
 (9, 'watching TV'),
 (10, 'computer work'),
 (11, 'car driving'),
 (12, 'ascending stairs'),
 (13, 'descending stairs'),
 (16, 'vacuum cleaning'),
 (17, 'ironing'),
 (18, 'folding laundry'),
 (19, 'house cleaning'),
 (20, 'playing soccer'),
 (24, 'rope jumping')]

label2Id = {c[1]:i for i,c in enumerate(label_map)}
action_dict = defaultdict(list)
skeleton_Ids = []
for i, a in enumerate(skeleton_classes):
    action_dict[label2Id[a]].append(i)
    skeleton_Ids.append(label2Id[a])

In [5]:
def get_class_ft(data, model, device, bs=32):
    ns, _, _ = data.shape 
    padded_mat = F.pad(input=data, pad=(0,0,0,0,0,bs-ns), mode='constant', value=0)
    _, vector_out = model(padded_mat.float().to(device)) # batch second mode
    action_feat_mat = vector_out[:ns, :].cpu().detach().numpy()
    return action_feat_mat

In [10]:
ae_model = BiLSTMEncDecModel(seq_len=50, input_size=36, hidden_size=512, linear_filters=[128,256,512], embedding_size=256, num_layers=1,bidirectional=True,batch_size=32, device=device)
prep_dir = '../tmp/random_input_100_epochs.pt'

ae_model.load_state_dict(torch.load(prep_dir))

<All keys matched successfully>

In [11]:
ae_model = ae_model.to(device)
action_ft_dict = {a: get_class_ft(torch.from_numpy(skeleton_mov[i, ...]), ae_model, device) for a,i in action_dict.items()}

In [12]:
trpLoss =ActionTripletLoss(action_ft_dict, distance='cosine', k=10, pos_thr=0.90, neg_thr=0.05, agg="mean", device=device)

In [13]:
pred_batch = torch.randn(8, 256)
true_actions = [1, 4, 3, 6, 10, 5, 3, 1]

In [14]:
loss = trpLoss(pred_batch, true_actions)