In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def iterate_data(data, batchsize, shuffle=False):
    if shuffle:
        indices = np.arange(data.shape[0])
        np.random.shuffle(indices)
    for start_idx in range(0, data.shape[0] - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield data[excerpt, :, :]

In [None]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
from torch.autograd import Variable
from torch.distributions import Categorical
import numpy as np
import math

def log_sum_exp(x, dim=1):
    x_max, x_argmax = x.max(dim, keepdim=True)
    x_max_broadcast = x_max.expand(*x.size())
    return x_max + torch.log(
        torch.sum(torch.exp(x - x_max_broadcast), dim=dim, keepdim=True))

class MDN(nn.Module):
    """A mixture density network layer
    The input maps to the parameters of a MoG probability distribution, where
    each Gaussian has O dimensions and diagonal covariance.
    Arguments:
        in_features (int): the number of dimensions in the input
        out_features (int): the number of dimensions in the output
        num_gaussians (int): the number of Gaussians per output dimensions
    Input:
        minibatch (BxD): B is the batch size and D is the number of input
            dimensions.
    Output:
        (pi, sigma, mu) (BxG, BxGxO, BxGxO): B is the batch size, G is the
            number of Gaussians, and O is the number of dimensions for each
            Gaussian. Pi is a multinomial distribution of the Gaussians. Sigma
            is the standard deviation of each Gaussian. Mu is the mean of each
            Gaussian.
    """

    def __init__(self, in_features, out_features, num_gaussians):
        super(MDN, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.num_gaussians = num_gaussians
        self.pi = nn.Sequential(
            nn.Linear(in_features, num_gaussians), nn.LogSoftmax(dim=1))
        self.sigma = nn.Linear(in_features, out_features * num_gaussians)
        self.mu = nn.Linear(in_features, out_features * num_gaussians)

    def forward(self, minibatch):
        pi = self.pi(minibatch)
        sigma = self.sigma(minibatch)
        # original sigma = torch.clamp(sigma, np.log(np.sqrt(1e-4)), 1e8)
        # working 
        sigma =  torch.clamp(sigma, np.log(np.sqrt(1e-3)), 5e1)
        #try 3 sigma = torch.clamp(sigma, np.log(np.sqrt(1e-3)), 1e4)
        sigma = sigma.view(-1, self.num_gaussians, self.out_features)
        mu = self.mu(minibatch)
        mu = mu.view(-1, self.num_gaussians, self.out_features)
        return pi, sigma, mu

    @staticmethod
    def gaussian_probability(sigma, x_mu, x):
        """Returns the probability of `data` given MoG parameters `sigma` and `mu`.
        Arguments:
            sigma (BxGxO): The standard deviation of the Gaussians. B is the batch
                size, G is the number of Gaussians, and O is the number of
                dimensions per Gaussian.
            mu (BxGxO): The means of the Gaussians. B is the batch size, G is the
                number of Gaussians, and O is the number of dimensions per Gaussian.
            data (BxI): A batch of data. B is the batch size and I is the number of
                input dimensions.
        Returns:
            probabilities (BxG): The probability of each point in the probability
                of the distribution in the corresponding sigma/mu index.
        """
        x = x.unsqueeze(1).expand_as(sigma)
        var = (torch.exp(sigma)**2)
        return -((x - x_mu)**2) / (2 * var + 1e-4) - sigma - math.log(
            math.sqrt(2 * math.pi))

    @staticmethod
    def mdn_loss(pi, sigma, mu, target):
        """Calculates the error, given the MoG parameters and the target
        The loss is the negative log likelihood of the data given the MoG
        parameters.
        """
        nll = log_sum_exp(pi[:, :, None] +
                          MDN.gaussian_probability(sigma, mu, target))
        nll = -torch.sum(nll, dim=-1)
        return torch.mean(nll)

    @staticmethod
    def sample(pi, sigma, mu):
        """Draw samples from a MoG.
        """
        categorical = Categorical(torch.exp(pi))
        pis = list(categorical.sample().data)
        sigma = torch.exp(sigma)
        sample = Variable(
            sigma.data.new(sigma.size(0), sigma.size(2)).normal_())
        for i, idx in enumerate(pis):
            sample[i] = sample[i].mul(sigma[i, idx]).add(mu[i, idx])
        return sample

In [None]:
def initialize_weights(model):
    if type(model) in [nn.Linear]:
        nn.init.xavier_normal_(model.weight.data)
    elif type(model) in [nn.LSTM, nn.RNN, nn.GRU]:
        nn.init.xavier_normal_(model.weight_hh_l0)
        nn.init.xavier_normal_(model.weight_ih_l0)

In [None]:
import torch.autograd as autograd

class SimpleRNN(torch.nn.Module):
    def __init__(self, n_features, n_outputs):
        super(SimpleRNN, self).__init__()
        # 32 was used for all the simulated data
        #hidden_dim = 32 #
        
        hidden_dim = 128 #hidden_dim

        #self.inp = torch.nn.Linear(n_features, hidden_size)
        num_layers = 2
        #self.rnn = LayerNormLSTM(n_features, hidden_dim, num_layers = num_layers)
        self.rnn = torch.nn.LSTM(n_features, hidden_dim, num_layers = num_layers)
        
        # 64 was used for all the simulated data
        #self.out = torch.nn.Linear(hidden_dim, 64)
        
        #self.out = torch.nn.Linear(hidden_dim, 32)
        self.mdn = MDN(hidden_dim, n_outputs, 5)

        
        #self.hidden = None
        
        initialize_weights(self.rnn)
        #initialize_weights(self.out)
        initialize_weights(self.mdn)

    def step(self, inputs, hidden=None, verbose=False):
        #input = self.inp(input)
        if verbose:
            print("Step 0:")
            print(inputs.shape)
        inputs = inputs.permute([1, 0, 2])
        if verbose:
            print("Step 1:")
            print(inputs.shape)
        #self.rnn.flatten_parameters()
        output, hidden = self.rnn(inputs, hidden)
        output = output[-1, :, :] #output[:, :, :] #output[-1, :, :]
        #output = output.permute([1, 0, 2])
        if verbose:
            print("Step 3:")
            print(output.shape)
        output = output.squeeze()
        if verbose:
            print("Step 4:")
            print(output.shape)
        #output = self.out(output)
        if verbose:
            print("Step 5:")
            print(output.shape)
            print(output)
        output = self.mdn(output)
        return output, hidden

    def forward(self, inputs, hidden=None, verbose=False):
        if verbose:
            print("inputs size: ", inputs.size)
        batch_size = inputs.size(0)    
        output, hidden = self.step(inputs, hidden, verbose=verbose)
        return output, hidden

In [None]:
vision_bins = 16
#rnn = SimpleRNN(n_features=5 + (vision_bins * 8), n_outputs=4).cuda()
rnn = SimpleRNN(n_features=5 +(vision_bins * 8), n_outputs=4).cuda()
#rnn = torch.load('asd.pt')
rnn = torch.load('simplemodel-rnn.pt')
rnn.eval()

# T-SNE Simulation

In [None]:
import tables
hdf5_file = tables.open_file('pathtofile.h5', mode='r')
train = hdf5_file.root.train
print(train.shape)
labels = []
for i in range(0, 5100):
    batch_data = train[i, :, :]
    print(batch_data[-1, 0:4])
    #labels.append(np.where(np.array([1, 0, 1, 0]) == 1)) # np.where(batch_data[:, 0:4] == 1))
    for l in  np.where(batch_data[-1, 0:4] == 1)[0]:
        labels.append(l)
        print(l)

labels = np.asarray(labels)
print("lshape", labels.shape)

unique, counts = np.unique(labels, return_counts=True)
# array = np.array([self.getX(), self.getY(), self.orientation, 0.333, 0, 0, 
#                          0, dx, d<y, np.cos(self.orientation), np.sin(self.orientation)])
#        self.history.append(np.concatenate([np.concatenate([self.label, array]), self.vision]))#

print("labels: ", unique)
print("counts: ", counts)
hdf5_file.close()

In [None]:
plt.hist(labels)

In [None]:
for l in np.where(np.array([0, 0, 1, 0]) == 1)[0]:
    print(l)

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds = []
labels = []
labels_wall = []
printcounter = 0
for i in range(0, 100000):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        if np.where(label == 1)[0].shape[0] > 1:
            labels.append(np.where(label == 1)[0][1] + 1)
        elif np.where(label == 1)[0].shape[0] > 0:
            labels.append(np.where(label == 1)[0][0] + 1)
        else:
            labels.append(0)
        if np.sum(label) > 0:
            labels_wall.append(1)
        else:
            labels_wall.append(0)
hdf5_file.close()

In [None]:
x = np.arange(2)
money = np.histogram(labels_wall, bins= [0, 1, 2])[0]


def millions(x, pos):
    'The two args are the value and tick position'
    return '$%1.1fM' % (x * 1e-6)


#formatter = FuncFormatter(millions)

fig, ax = plt.subplots()
#ax.yaxis.set_major_formatter(formatter)
plt.bar(x, money)
plt.xticks(x, ('normal walk', 'avoid wall'))
plt.show()

In [None]:
hidds = np.asarray(hidds)
labels = np.asarray(labels)
labels_wall = np.asarray(labels_wall)
print(hidds.shape)
print(labels.shape)

In [None]:
#from sklearn.manifold import TSNE
#https://github.com/DmitryUlyanov/Multicore-TSNE}
from MulticoreTSNE import MulticoreTSNE as TSNE

X_embedded = TSNE(n_components=2, perplexity = 50).fit_transform(hidds)
X_embedded.shape

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import matplotlib.patches as mpatches

colors = ['blue', 'red']


fig = plt.figure(figsize=(8,8))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], c=np.round(labels_wall), cmap=matplotlib.colors.ListedColormap(colors), s=2)
red_patch = mpatches.Patch(color='red', label='avoid wall')
blue_patch = mpatches.Patch(color='blue', label='normal walk')
plt.legend(handles=[red_patch, blue_patch])


In [None]:
from matplotlib.ticker import NullFormatter
import time

perplexities = [0, 5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, len(perplexities), figsize=(15, 8))


for i, perplexity in enumerate(perplexities):
    ax = subplots[i]

    t0 = time.time()
    X_embedded = TSNE(n_components=2, perplexity = perplexity).fit_transform(hidds)
    t1 = time.time()
    print("circles, perplexity=%d in %.2g sec" % (perplexity, t1 - t0))
    ax.set_title("Perplexity=%d" % perplexity)
    ax.scatter(X_embedded[:, 0], X_embedded[:, 1], s=5, c=np.round(labels), cmap=matplotlib.colors.ListedColormap(colors))
    ax.xaxis.set_major_formatter(NullFormatter())
    ax.yaxis.set_major_formatter(NullFormatter())
    ax.axis('tight')

# UMAP

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds = []
labels = []
labels_wall = []
printcounter = 0
for i in range(0, 2500):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        if np.where(label == 1)[0].shape[0] > 1:
            labels.append(np.where(label == 1)[0][1] + 1)
        elif np.where(label == 1)[0].shape[0] > 0:
            labels.append(np.where(label == 1)[0][0] + 1)
        else:
            labels.append(0)
        if np.sum(label) > 0:
            labels_wall.append(1)
        else:
            labels_wall.append(0)
hdf5_file.close()

In [None]:
hidds = np.asarray(hidds)
labels = np.asarray(labels)
labels_wall = np.asarray(labels_wall)
print(hidds.shape)
print(labels.shape)

In [None]:
import umap

reducer = umap.UMAP()

In [None]:
#import warnings
#warnings.filterwarnings('ignore')

In [None]:
embedding = reducer.fit_transform(hidds)
embedding.shape

In [None]:
plt.scatter(embedding[:, 0], embedding[:, 1], c=labels_wall, cmap='Spectral', s=2)
plt.gca().set_aspect('equal', 'datalim')
plt.colorbar(boundaries=np.arange(6)-0.5).set_ticks(np.arange(5))
plt.title('UMAP projection with normal Labels', fontsize=24);

# UMAP WITH LABELS

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds = []
labels = []
labels_wall = []
printcounter = 0
for i in range(0, 100000):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        if np.where(label == 1)[0].shape[0] > 1:
            labels.append(np.where(label == 1)[0][1] + 1)
        elif np.where(label == 1)[0].shape[0] > 0:
            labels.append(np.where(label == 1)[0][0] + 1)
        else:
            labels.append(0)
        if np.sum(label) > 0:
            labels_wall.append(1)
        else:
            labels_wall.append(0)
hdf5_file.close()

In [None]:
data = np.asarray(hidds)
labels = np.asarray(labels)
labels_wall = np.asarray(labels_wall)
print(labels.shape)

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds_test = []
labels_test = []
labels_wall_test = []
printcounter = 0
for i in range(0, 25000):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds_test.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        if np.where(label == 1)[0].shape[0] > 1:
            labels_test.append(np.where(label == 1)[0][1] + 1)
        elif np.where(label == 1)[0].shape[0] > 0:
            labels_test.append(np.where(label == 1)[0][0] + 1)
        else:
            labels_test.append(0)
        if np.sum(label) > 0:
            labels_wall_test.append(1)
        else:
            labels_wall_test.append(0)
hdf5_file.close()

In [None]:
test_data = np.asarray(hidds_test)
labels_wall_test = np.asarray(labels_wall_test)
print(test_data.shape)
print(labels_wall_test.shape)

In [None]:
#mapper = umap.UMAP(min_dist=1, n_neighbors=10).fit(data, y=labels_neighs_digitzized_test)
mapper = umap.UMAP(n_neighbors=19).fit(data, y=labels_wall)

In [None]:
test_embedding = mapper.transform(test_data)

In [None]:
colors = ['blue', 'red']

In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*mapper.embedding_.T, s=3, c=labels_wall, cmap=matplotlib.colors.ListedColormap(colors), alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
red_patch = mpatches.Patch(color='red', label='avoid wall')
blue_patch = mpatches.Patch(color='blue', label='normal walk')
plt.legend(handles=[red_patch, blue_patch])
plt.title('Train Data Embedded via UMAP');

In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*test_embedding.T, s=3, c=labels_wall_test, cmap=matplotlib.colors.ListedColormap(colors), alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
red_patch = mpatches.Patch(color='red', label='avoid wall')
blue_patch = mpatches.Patch(color='blue', label='normal walk')
plt.legend(handles=[red_patch, blue_patch])
plt.title('Data Embedded via UMAP');