In [36]:
import tensorflow as tf 
import tensorflow.keras.layers as layers
import tensorflow.keras.models as models
import tensorflow.keras.optimizers as optim

import pandas as pd 
import numpy as np 
import os

In [37]:
PATH = r'/Users/suyashsachdeva/Desktop/GyanBhandar/tox21_dense_train.csv'
data = pd.read_csv(PATH).values
labels = pd.read_csv(PATH).columns
labels

Index(['Unnamed: 0', 'AW', 'AWeight', 'Arto', 'BertzCT', 'Chi0', 'Chi1',
       'Chi10', 'Chi2', 'Chi3',
       ...
       'W3D', 'W3DH', 'WNSA1', 'WNSA2', 'WNSA3', 'WPSA1', 'WPSA2', 'WPSA3',
       'grav', 'rygr'],
      dtype='object', length=802)

In [38]:
for x in range(799):
    if max(data[:, 3:][:, x])!=min(data[:, 3:][:, x]):
        xtr = (data[:, 3:][:, x]-np.mean(data[:, 3:][:, x]))/(max(data[:, 3:][:, x])-min(data[:, 3:][:, x]))
        data[:, 3:][:, x] = xtr
    else:
        xtr = (data[:, 3:][:, x])/(max(data[:, 3:][:, x])+min(data[:, 3:][:, x])+1e-10)+0.5
        data[:, 3:][:, x] = xtr

In [39]:
data = data[:, 3:]

In [40]:
xtrain = np.array(data[:10000], dtype="float32")
xvalid = np.array(data[10000:12000], dtype="float32")

In [41]:
def DenseBlock(x, ndim):
    x = layers.Dense(ndim)(x)
    x = layers.Dropout(0.1)(x)
    x = layers.BatchNormalization(momentum=0.9)(x)
    x = layers.LeakyReLU(0.2)(x)
    return  x

def autoencoder(inp):
    inp = layers.Input(inp)
    x = DenseBlock(inp, 100)
    enc_out = DenseBlock(x, 12)
    x = DenseBlock(enc_out, 100)
    dec_out = DenseBlock(x, 799)
    encoder = models.Model(inputs=inp, outputs=enc_out, name="Encoder")
    model = models.Model(inputs=inp, outputs=dec_out, name="Autoencoder")

    return encoder, model

encoder, model = autoencoder((799))
print(encoder.summary())
print(model.summary())

Model: "Encoder"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_3 (InputLayer)        [(None, 799)]             0         
                                                                 
 dense_8 (Dense)             (None, 100)               80000     
                                                                 
 dropout_8 (Dropout)         (None, 100)               0         
                                                                 
 batch_normalization_8 (Batc  (None, 100)              400       
 hNormalization)                                                 
                                                                 
 leaky_re_lu_8 (LeakyReLU)   (None, 100)               0         
                                                                 
 dense_9 (Dense)             (None, 12)                1212      
                                                           

In [42]:
model.compile(loss='mse', optimizer=optim.Adam(learning_rate=1e-4), metrics="accuracy")
model.fit(xtrain, xtrain, batch_size=100, verbose=1, epochs=200, validation_data=[xvalid, xvalid], validation_batch_size=100)

Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200
Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78

<keras.callbacks.History at 0x2c4bf0610>

In [43]:
# model.save("./model50.h5")
# encoder.save("./encoder50.h5")

In [81]:
from minisom import MiniSom

In [82]:
r = 4
c = 3
iter = 50000
sigma = 1
lr = 0.5

In [83]:
som = MiniSom(x=r, y=c, input_len=12, sigma=sigma, learning_rate=lr)
xsom = encoder(xtrain)
som.random_weights_init(xsom)

In [84]:
som.train_random(xsom, iter, verbose=1)

 [ 50000 / 50000 ] 100% - 0:00:00 left 
 quantization error: 1.1984481


In [85]:
# each neuron represents a cluster
winner_coordinates = np.array([som.winner(x) for x in xsom]).T
# with np.ravel_multi_index we convert the bidimensional
# coordinates to a monodimensional index
cluster_index = np.ravel_multi_index(winner_coordinates, [r,c])

In [86]:
cluster_index

array([5, 8, 2, ..., 9, 9, 7])

In [91]:
# import matplotlib.pyplot as plt
# %matplotlib inline

# # plotting the clusters using the first 2 dimentions of the data
# for c in np.unique(cluster_index):
#     plt.scatter(xsom[cluster_index == c, 0],
#                 xsom[cluster_index == c, 1], label='cluster='+str(c), alpha=.7)

# # plotting centroids
# for centroid in som.get_weights():
#     plt.scatter(centroid[:, 0], centroid[:, 1], marker='x', 
#                 s=80, linewidths=5, color='k', label='centroid')
# plt.legend();

TypeError: Only integers, slices (`:`), ellipsis (`...`), tf.newaxis (`None`) and scalar tf.int32/tf.int64 tensors are valid indices, got array([False, False, False, ..., False, False, False])