In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds = []
labels_age = []
labels_velo = []
printcounter = 0
for i in range(0, 100000):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    #print(label)
    #print(label[3])
    p = 1 
    labels_age.append(batch_data[-1, 6])
    labels_velo.append(np.linalg.norm(batch_data[-1, 11:13]))
hdf5_file.close()

In [None]:
plt.hist(labels_age, 60)
#plt.yscale('log')+6


In [None]:
plt.hist(labels_velo, 20)

In [None]:
%matplotlib inline

import matplotlib
import numpy as np
import matplotlib.pyplot as plt

In [None]:
def iterate_data(data, batchsize, shuffle=False):
    if shuffle:
        indices = np.arange(data.shape[0])
        np.random.shuffle(indices)
    for start_idx in range(0, data.shape[0] - batchsize + 1, batchsize):
        if shuffle:
            excerpt = indices[start_idx:start_idx + batchsize]
        else:
            excerpt = slice(start_idx, start_idx + batchsize)
        yield data[excerpt, :, :]

In [None]:
import torch
import torch.nn as nn
from torch.nn.utils import weight_norm
from torch.autograd import Variable
from torch.distributions import Categorical
import numpy as np
import math

def log_sum_exp(x, dim=1):
    x_max, x_argmax = x.max(dim, keepdim=True)
    x_max_broadcast = x_max.expand(*x.size())
    return x_max + torch.log(
        torch.sum(torch.exp(x - x_max_broadcast), dim=dim, keepdim=True))

class MDN(nn.Module):
    """A mixture density network layer
    The input maps to the parameters of a MoG probability distribution, where
    each Gaussian has O dimensions and diagonal covariance.
    Arguments:
        in_features (int): the number of dimensions in the input
        out_features (int): the number of dimensions in the output
        num_gaussians (int): the number of Gaussians per output dimensions
    Input:
        minibatch (BxD): B is the batch size and D is the number of input
            dimensions.
    Output:
        (pi, sigma, mu) (BxG, BxGxO, BxGxO): B is the batch size, G is the
            number of Gaussians, and O is the number of dimensions for each
            Gaussian. Pi is a multinomial distribution of the Gaussians. Sigma
            is the standard deviation of each Gaussian. Mu is the mean of each
            Gaussian.
    """

    def __init__(self, in_features, out_features, num_gaussians):
        super(MDN, self).__init__()
        self.in_features = in_features
        self.out_features = out_features
        self.num_gaussians = num_gaussians
        self.pi = nn.Sequential(
            nn.Linear(in_features, num_gaussians), nn.LogSoftmax(dim=1))
        self.sigma = nn.Linear(in_features, out_features * num_gaussians)
        self.mu = nn.Linear(in_features, out_features * num_gaussians)

    def forward(self, minibatch):
        pi = self.pi(minibatch)
        sigma = self.sigma(minibatch)
        # original sigma = torch.clamp(sigma, np.log(np.sqrt(1e-4)), 1e8)
        # working 
        sigma =  torch.clamp(sigma, np.log(np.sqrt(1e-3)), 5e1)
        #try 3 sigma = torch.clamp(sigma, np.log(np.sqrt(1e-3)), 1e4)
        sigma = sigma.view(-1, self.num_gaussians, self.out_features)
        mu = self.mu(minibatch)
        mu = mu.view(-1, self.num_gaussians, self.out_features)
        return pi, sigma, mu

    @staticmethod
    def gaussian_probability(sigma, x_mu, x):
        """Returns the probability of `data` given MoG parameters `sigma` and `mu`.
        Arguments:
            sigma (BxGxO): The standard deviation of the Gaussians. B is the batch
                size, G is the number of Gaussians, and O is the number of
                dimensions per Gaussian.
            mu (BxGxO): The means of the Gaussians. B is the batch size, G is the
                number of Gaussians, and O is the number of dimensions per Gaussian.
            data (BxI): A batch of data. B is the batch size and I is the number of
                input dimensions.
        Returns:
            probabilities (BxG): The probability of each point in the probability
                of the distribution in the corresponding sigma/mu index.
        """
        x = x.unsqueeze(1).expand_as(sigma)
        var = (torch.exp(sigma)**2)
        return -((x - x_mu)**2) / (2 * var + 1e-4) - sigma - math.log(
            math.sqrt(2 * math.pi))

    @staticmethod
    def mdn_loss(pi, sigma, mu, target):
        """Calculates the error, given the MoG parameters and the target
        The loss is the negative log likelihood of the data given the MoG
        parameters.
        """
        nll = log_sum_exp(pi[:, :, None] +
                          MDN.gaussian_probability(sigma, mu, target))
        nll = -torch.sum(nll, dim=-1)
        return torch.mean(nll)

    @staticmethod
    def sample(pi, sigma, mu):
        """Draw samples from a MoG.
        """
        categorical = Categorical(torch.exp(pi))
        pis = list(categorical.sample().data)
        sigma = torch.exp(sigma)
        sample = Variable(
            sigma.data.new(sigma.size(0), sigma.size(2)).normal_())
        for i, idx in enumerate(pis):
            sample[i] = sample[i].mul(sigma[i, idx]).add(mu[i, idx])
        return sample

In [None]:
def initialize_weights(model):
    if type(model) in [nn.Linear]:
        nn.init.xavier_normal_(model.weight.data)
    elif type(model) in [nn.LSTM, nn.RNN, nn.GRU]:
        nn.init.xavier_normal_(model.weight_hh_l0)
        nn.init.xavier_normal_(model.weight_ih_l0)

In [None]:
import torch.autograd as autograd

class SimpleRNN(torch.nn.Module):
    def __init__(self, n_features, n_outputs):
        super(SimpleRNN, self).__init__()
        # 32 was used for all the simulated data
        #hidden_dim = 32 #
        
        hidden_dim = 128 #hidden_dim

        #self.inp = torch.nn.Linear(n_features, hidden_size)
        num_layers = 2
        #self.rnn = LayerNormLSTM(n_features, hidden_dim, num_layers = num_layers)
        self.rnn = torch.nn.LSTM(n_features, hidden_dim, num_layers = num_layers)
        
        # 64 was used for all the simulated data
        #self.out = torch.nn.Linear(hidden_dim, 64)
        
        #self.out = torch.nn.Linear(hidden_dim, 32)
        self.mdn = MDN(hidden_dim, n_outputs, 5)

        
        #self.hidden = None
        
        initialize_weights(self.rnn)
        #initialize_weights(self.out)
        initialize_weights(self.mdn)

    def step(self, inputs, hidden=None, verbose=False):
        #input = self.inp(input)
        if verbose:
            print("Step 0:")
            print(inputs.shape)
        inputs = inputs.permute([1, 0, 2])
        if verbose:
            print("Step 1:")
            print(inputs.shape)
        #self.rnn.flatten_parameters()
        output, hidden = self.rnn(inputs, hidden)
        output = output[-1, :, :] #output[:, :, :] #output[-1, :, :]
        #output = output.permute([1, 0, 2])
        if verbose:
            print("Step 3:")
            print(output.shape)
        output = output.squeeze()
        if verbose:
            print("Step 4:")
            print(output.shape)
        #output = self.out(output)
        if verbose:
            print("Step 5:")
            print(output.shape)
            print(output)
        output = self.mdn(output)
        return output, hidden

    def forward(self, inputs, hidden=None, verbose=False):
        if verbose:
            print("inputs size: ", inputs.size)
        batch_size = inputs.size(0)    
        output, hidden = self.step(inputs, hidden, verbose=verbose)
        return output, hidden

In [None]:
vision_bins = 16
#rnn = SimpleRNN(n_features=5 + (vision_bins * 8), n_outputs=4).cuda()
rnn = SimpleRNN(n_features=5 +(vision_bins * 8), n_outputs=4).cuda()
#rnn = torch.load('asd.pt')
rnn = torch.load('bee40-rnn.pt')
rnn.eval()

# T-SNE Simulation

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds = []
labels_age = []
labels_velo = []
printcounter = 0
for i in range(0, 100000):
    if (printcounter == 1000):
        clear_output()
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    #print(label)
    #print(label[3])
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        labels_age.append(batch_data[-1, 6])
        labels_velo.append(np.linalg.norm(batch_data[-1, 11:13]))
hdf5_file.close()

In [None]:
print(batch_data[-1, :15])

In [None]:
plt.hist(labels_age, 60)
#plt.yscale('log')+6


In [None]:
plt.hist(labels_velo, 20)

In [None]:
hidds = np.asarray(hidds)
labels_age = np.asarray(labels_age)
labels_velo = np.asarray(labels_velo)
print(hidds.shape)
print(labels_age.shape)

In [None]:
#from sklearn.manifold import TSNE
#https://github.com/DmitryUlyanov/Multicore-TSNE}
from MulticoreTSNE import MulticoreTSNE as TSNE

X_embedded = TSNE(n_components=2, perplexity = 50).fit_transform(hidds)
X_embedded.shape

In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np





fig = plt.figure(figsize=(8,8))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], s=5, c=np.round(labels_age), cmap='nipy_spectral') #, cmap=matplotlib.colors.ListedColormap(colors))
cb = plt.colorbar()


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np





fig = plt.figure(figsize=(8,8))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_age), cmap='nipy_spectral') #, cmap=matplotlib.colors.ListedColormap(colors))
cb = plt.colorbar()


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np


colors = ['black', 'blue', 'red', 'green']


fig = plt.figure(figsize=(8,8))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], s=5, c=np.round(labels_velo), cmap='nipy_spectral') #cmap='Spectral')
cb = plt.colorbar()


In [None]:
import matplotlib
import matplotlib.pyplot as plt
import numpy as np


colors = ['black', 'blue', 'red', 'green']


fig = plt.figure(figsize=(8,8))
plt.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_velo), cmap='nipy_spectral') #cmap='Spectral')
cb = plt.colorbar()


In [None]:
fig = plt.figure()
gs = gridspec.GridSpec(1, 2)

ax0 = plt.subplot(gs[0, 0])
plt.scatter(x, y, s=20)

ax1 = plt.subplot(gs[0, 1])
cm = plt.cm.get_cmap('RdYlBu_r')
plt.scatter(x, y, s=20 ,c=z, cmap=cm)

fig.tight_layout()

cbaxes = inset_axes(ax1, width="30%", height="3%", loc=3) 
plt.colorbar(cax=cbaxes, ticks=[0.,1], orientation='horizontal')



In [None]:
from matplotlib.ticker import NullFormatter
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#perplexities = [0, 5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 2, figsize=(21, 7))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
ax.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_age), cmap='nipy_spectral') #, cmap=matplotlib.colors.ListedColormap(colors))
#cbaxes = inset_axes(ax, width="30%", height="3%", loc=3) 
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_velo), cmap='nipy_spectral') #cmap='Spectral')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')





In [None]:
from matplotlib.ticker import NullFormatter
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
#perplexities = [0, 5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 2, figsize=(21, 7))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
ax.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_age), cmap='nipy_spectral') #, cmap=matplotlib.colors.ListedColormap(colors))
cbaxes = inset_axes(ax, width="30%", height="3%", loc=3) 
plt.colorbar(cax=cbaxes, orientation='horizontal')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(X_embedded[:, 0], X_embedded[:, 1], s=0.03, c=np.round(labels_velo), cmap='nipy_spectral') #cmap='Spectral')
cbaxes = inset_axes(ax, width="30%", height="3%", loc=3) 
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')





# UMAP

In [None]:
import tables 
import numpy as np
import torch
from IPython.display import clear_output

hdf5_file = tables.open_file('pathtofile.h5', mode='r')
# features; 0: age; 1 bee_id; rest: normal data
train = hdf5_file.root.train
print(train.shape)
hidds_test = []
labels_age_test = []
labels_velo_test = []
printcounter = 0
for i in range(0, 70000):
    if (printcounter == 1000):
        clear_output()
        print(train.shape)
        print('Progress report: ', i/100000)
        printcounter = 0
    printcounter += 1
    batch_data = train[i, :, :]
    label = batch_data[-1, :4]
    #print(label)
    #print(label[3])
    p = 1 
    if p >= np.random.uniform(0.0, 1.0):
        batch_X = batch_data[:-1, 10:].astype(np.float32)[None, :, :]
        batch_X = np.insert(batch_X,[1],batch_X[0],axis=0)
        batch_X = torch.from_numpy(batch_X)
        batch_X = torch.autograd.Variable(batch_X).cuda()
        Y_predicted, hidden = rnn.forward(batch_X, verbose=False)
        hidds_test.append(np.concatenate((hidden[0].data.cpu().numpy()[1, 0, :],hidden[1].data.cpu().numpy()[1, 0, :])))
        labels_age_test.append(batch_data[-1, 6])
        labels_velo_test.append(np.linalg.norm(batch_data[-1, 11:13]))        
hdf5_file.close()

In [None]:
test_data = np.asarray(hidds_test)
labels_age_test = np.asarray(labels_age_test)
labels_velo_test = np.asarray(labels_velo_test)
print(test_data.shape)
print(labels_age_test.shape)

In [None]:
plt.hist(labels_age_test, 60)
#plt.yscale('log')+6


In [None]:
plt.hist(labels_velo_test, 20)

In [None]:
import warnings
warnings.filterwarnings('ignore')

In [None]:
import umap

In [None]:
#mapper = umap.UMAP(min_dist=1, n_neighbors=10).fit(data, y=labels_neighs_digitzized_test)
mapper = umap.UMAP(n_neighbors=89, min_dist=0.5).fit(hidds, y=labels_age)

In [None]:
test_embedding = mapper.transform(test_data)

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 10))
plt.scatter(*mapper.embedding_.T, s=1, c=labels_age, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
#cb = plt.colorbar()
plt.title('Train data embedded via UMAP');

In [None]:
fig, ax = plt.subplots(1, figsize=(10, 10))
plt.scatter(*test_embedding.T, s=1, c=labels_age_test, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
#cb = plt.colorbar()
plt.title('Test data embedded via UMAP');

In [None]:
from matplotlib.ticker import NullFormatter

perplexities = [0, 5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 2, figsize=(20, 10))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
#ax.set_title("Perplexity=%d" % perplexity)
ax.scatter(*mapper.embedding_.T, s=1, c=labels_age, cmap='nipy_spectral', alpha=1.0)
#cb = plt.colorbar()
#ax.set_title('Train data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(*test_embedding.T, s=1, c=labels_age_test, cmap='nipy_spectral', alpha=1.0)
#ax.set_title('Test data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')



In [None]:
from matplotlib.ticker import NullFormatter

perplexities = [0, 5, 30, 50, 100]
(fig, subplots) = plt.subplots(1, 2, figsize=(20, 10))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
#ax.set_title("Perplexity=%d" % perplexity)
ax.scatter(*mapper.embedding_.T, s=0.03, c=labels_age, cmap='nipy_spectral', alpha=1.0)
#cb = plt.colorbar()
#ax.set_title('Train data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(*test_embedding.T, s=0.03, c=labels_age_test, cmap='nipy_spectral', alpha=1.0)
#ax.set_title('Test data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')



In [None]:
#mapper = umap.UMAP(n_neighbors=10).fit(data, y=labels_neighs_digitzized_test)
mapper_cool = umap.UMAP(n_neighbors=89, min_dist=0.5).fit(hidds, y=labels_age)

In [None]:
test_embedding_cool = mapper_cool.transform(test_data)

In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*mapper_cool.embedding_.T, s=1, c=labels_velo, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar()
#cbar = plt.colorbar(boundaries=np.arange(11)-0.5)
#cbar.set_ticks(np.arange(10))
plt.title('Train Data Embedded via UMAP');

In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*test_embedding_cool.T, s=1, c=labels_velo_test, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar()
plt.title('Data Embedded via UMAP');

In [None]:
from matplotlib.ticker import NullFormatter

(fig, subplots) = plt.subplots(1, 2, figsize=(20, 10))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
#ax.set_title("Perplexity=%d" % perplexity)
ax.scatter(*mapper_cool.embedding_.T, s=0.03, c=labels_velo, cmap='nipy_spectral', alpha=1.0)
#cb = plt.colorbar()
#ax.set_title('Train data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(*test_embedding_cool.T, s=0.03, c=labels_velo_test, cmap='nipy_spectral', alpha=1.0)
#ax.set_title('Test data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')



In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*mapper_cool.embedding_.T, s=0.03, c=labels_age, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar()
plt.title('Train Data Embedded via UMAP');

In [None]:
fig, ax = plt.subplots(1, figsize=(14, 10))
plt.scatter(*test_embedding_cool.T, s=0.03, c=labels_age_test, cmap='nipy_spectral', alpha=1.0)
plt.setp(ax, xticks=[], yticks=[])
cbar = plt.colorbar()
plt.title('Test Data Embedded via UMAP');

In [None]:
from matplotlib.ticker import NullFormatter

(fig, subplots) = plt.subplots(1, 2, figsize=(20, 10))

fig.subplots_adjust(wspace=0.02, hspace=0.02)

ax = subplots[0]
ax.scatter(*test_embedding.T, s=0.03, c=labels_age_test, cmap='nipy_spectral', alpha=1.0)
#ax.set_title('Test data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

ax = subplots[1]
ax.scatter(*test_embedding_cool.T, s=0.03, c=labels_velo_test, cmap='nipy_spectral', alpha=1.0)
#ax.set_title('Test data embedded via UMAP')
ax.xaxis.set_major_formatter(NullFormatter())
ax.yaxis.set_major_formatter(NullFormatter())
ax.axis('tight')

