In [146]:
import time
from typing import *
import torch
import torch.nn as nn
import torch.optim as optim
import matplotlib.pyplot as plt
import torch.nn.functional as F
import pandas as pd
import numpy as np
import torch.utils.data as data_utils
from sklearn.metrics import roc_auc_score
from argparse import ArgumentParser
from midas_cores import CMSCounter, MidasR
from online_autoencoder import lstmautoencoder
import midas

In [2]:
batch_size=10000
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

In [3]:
class AE(nn.Module):
    def __init__(self, **kwargs):
        super().__init__()
        self.encoder_hidden_layer = nn.Linear(
            in_features=1, out_features=10
        )
        self.encoder_output_layer = nn.Linear(
            in_features=10, out_features=10
        )
        self.decoder_hidden_layer = nn.Linear(
            in_features=10, out_features=10
        )
        self.decoder_output_layer = nn.Linear(
            in_features=10, out_features=1
        )

    def forward(self, features):
        activation = self.encoder_hidden_layer(features)
        activation = torch.relu(activation)
        code = self.encoder_output_layer(activation)
        code = torch.relu(code)
        activation = self.decoder_hidden_layer(code)
        activation = torch.relu(activation)
        activation = self.decoder_output_layer(activation)
        reconstructed = activation
        return reconstructed

In [145]:
edges = pd.read_csv("../data/darpa_processed.csv")
labels = pd.read_csv("../data/darpa_ground_truth.csv")

ed_tens = torch.tensor(edges[["source"]].values)
label_tens = torch.tensor(edges["dest"].values)

(ed_tens.size(), label_tens.size())

train = data_utils.TensorDataset(ed_tens, label_tens)
train_loader = data_utils.DataLoader(train, batch_size=batch_size, shuffle=False)

In [138]:
edges.columns

Index(['source', 'dest', 'time'], dtype='object')

In [158]:
class LSTMAE(nn.Module):

    def __init__(self, embedding_dim, hidden_dim, node_count):
        super().__init__()
        self.hidden_dim = hidden_dim

        self.edge_embed = nn.Embedding(node_count, embedding_dim)

        # takes word embeddings as inputs, and outputs hidden states
        # with dimensionality hidden_dim.
        self.fc1 = nn.Linear(embedding_dim, hidden_dim)

        # The linear layer that maps from hidden state space to tag space
        self.output = nn.Linear(hidden_dim, embedding_dim)
    def embed(self, node):
        return self.edge_embed(node)
    def forward(self, node):
        embeds = F.relu(self.edge_embed(node))
        hidden = F.relu(self.fc1(embeds))
        output = self.output(hidden)
        return output

In [165]:
losses = np.array([])
model = LSTMAE(4,50,100000).to(device)
optimizer = optim.Adam(model.parameters(), lr=0.01)
loss_function = nn.HingeEmbeddingLoss(reduction='none')
for batch, label in train_loader:
    batch, label = batch.view(batch.size(0),-1).to(device), label.to(device)
    optimizer.zero_grad()
    y_pred = model(batch)
    y = model.embed(label)
    loss = loss_function(y_pred.float().view(batch.size(0),-1), y)
    print ("predicted: {}, expected:{}, loss:{}".format(y_pred[0].data.cpu().numpy(), y[0].data.cpu().numpy(), loss[0].data.cpu().numpy()))
    losses = np.append(losses, loss.data.cpu().numpy().flatten())
    
    a = loss.mean()
    a.backward()
    optimizer.step()


predicted: [[-0.13104497  0.5213153  -0.38610598  0.03463575]], expected:[ 0.08766886 -0.5457511  -0.88715315  0.65416604], loss:[1. 1. 1. 1.]
predicted: [[-0.02882207  0.25915146 -0.2934159  -0.2840139 ]], expected:[0.5310138  2.3501315  1.9149865  0.86543214], loss:[1. 1. 1. 1.]
predicted: [[ 0.26369965  0.51262605 -0.2146128   0.42120662]], expected:[0.5310138  2.3501315  1.9149865  0.86543214], loss:[1. 1. 1. 1.]
predicted: [[-0.43657532  0.8915827  -0.6255714   0.05767796]], expected:[ 1.2250818   0.71341157 -0.90433407 -2.236217  ], loss:[1. 1. 1. 1.]
predicted: [[ 0.100502    0.15714262 -0.31877506 -0.01581306]], expected:[-0.42599696  0.5900729   1.3262836   1.6268375 ], loss:[1. 1. 1. 1.]
predicted: [[ 0.08909777  0.14802358 -0.32085142 -0.04069541]], expected:[-0.48068404 -1.1988721  -1.301705    1.5487667 ], loss:[1. 1. 1. 1.]
predicted: [[-0.10043147  0.76871186 -0.29419228  0.24810156]], expected:[-0.6473041  0.80212   -1.5196803  1.4479843], loss:[1. 1. 1. 1.]
predicted: 

In [162]:
roc_auc_score(labels, losses)

ValueError: Found input variables with inconsistent numbers of samples: [4554344, 18217376]

In [73]:
losses.size()

AttributeError: 'list' object has no attribute 'size'

In [60]:
inputs = [torch.randn(1, 3) for _ in range(5)]
print(inputs)

[tensor([[-0.8254,  0.4357, -0.9170]]), tensor([[-1.0914,  0.2126, -0.8948]]), tensor([[-0.6384,  1.2218, -0.1048]]), tensor([[ 0.3744, -0.3111,  0.7463]]), tensor([[ 0.2344,  0.1531, -0.4834]])]
