In [1]:
from bhtsne import tsne
import numpy as np
import plotly.plotly as plt
import plotly.graph_objs as go
import gc

In [2]:
X_train = np.load('../data/X_train.npy', mmap_mode = 'r')
y_train = np.load('../data/y_train.npy', mmap_mode = 'r')
X_val = np.load('../data/X_val.npy', mmap_mode = 'r')
y_val = np.load('../data/y_val.npy', mmap_mode = 'r')
X_test = np.load('../data/X_test.npy', mmap_mode='r')
y_test = np.load('../data/y_test.npy', mmap_mode='r')
X = np.concatenate((X_train, X_val, X_test), axis = 0)
y = np.concatenate((y_train, y_val, y_test), axis = 0)

In [3]:
mask = np.random.choice(np.arange(X.shape[0]), size = 10000)
X_sub = X[mask,:]
y_sub = y[mask]
del X, y
gc.collect();

In [4]:
embedded = tsne(X_sub)

In [5]:
labels = np.unique(y_sub)

In [6]:
labels

array([ 0,  5,  9, 10, 11, 15, 17, 18, 20, 21])

In [7]:
names = {0:'back (dos)', 5:'ipsweep (probe)', 9:'neptune (dos)', 10:'nmap (probe)', 11:'normal connection', 14:'pod (dos)', 15:'portsweep (probe)', 17:'satan (probe)', 18:'smurf (dos)', 20:'teardrop (dos)' , 21:'warezclient (remote to user)'}

In [8]:
def get_trace(label):
    x = embedded[y_sub==label, 0]
    y = embedded[y_sub==label, 1]
    ix = np.where(labels==label)[0][0]
    margin = 255//len(labels)
    trace = go.Scatter(
        x = x,
        y = y,
        name = names[label],
        mode = 'markers',
        marker = dict(
            size = 5, 
            color = 'rgba('+str(margin*ix%255)+','+str(margin*(ix+4)%255)+','+str(margin*(ix+8)%255)+',.9)'
        )
    )
    return trace

In [9]:
data = [get_trace(x) for x in labels]
layout = go.Layout(title = 'tSNE of 10000 samples',
              yaxis = dict(zeroline = False),
              xaxis = dict(zeroline = False),
              hovermode = "closest"
             )
figure = go.Figure(data = data, layout = layout)
plt.iplot(figure, filename='tsne-scatter')

High five! You successfully sent some data to your account on plotly. View your plot in your browser at https://plot.ly/~jvmancuso/0 or inside your plot.ly account where it is named 'tsne-scatter'


In [11]:
import pandas as pd

In [14]:
tsne_df = pd.DataFrame(embedded, columns = ['x','y'])
labels_df = pd.DataFrame(labels)
names_df = pd.DataFrame(list(names.items()), columns = ['label', 'name'])
y_sub_df = pd.DataFrame(y_sub)

In [16]:
tsne_df.to_csv('../data/tsne_data.csv')
labels_df.to_csv('../data/labels.csv')
names_df.to_csv('../data/names.csv')
y_sub_df.to_csv('../data/y_sub.csv')