In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib import animation  
from mpl_toolkits.mplot3d import Axes3D

from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn import manifold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing

In [3]:
dao_data = pd.read_csv("tokens.csv")
dao_data = dao_data.copy().dropna()
dao_data.head(4)

Unnamed: 0,_id/$oid,name,symbol,contractAddress,chain/$oid,logo,decimal,financialData/price,financialData/volume24h,financialData/volumeChange24h,...,financialData/percentChange60d,financialData/percentChange90d,financialData/marketCap,financialData/marketCapDominance,financialData/fullyDilutedMarketCap,financialData/lastUpdated/$date,live,createdAt/$date,updatedAt/$date,__v
1,623e1d5fd02aba1aed362f99,Alchemix,ALCX,0xdbdb4d16eda451d0503b854cf79d55697f90c8df,623c7bc416dc4a608cd24b00,https://s2.coinmarketcap.com/static/img/coins/...,18,107.118305,23941030.0,98.6279,...,-29.946043,-47.405427,128138600.0,0.0064,167420000.0,2022-03-25T19:50:00Z,True,2022-03-25T19:51:59.218Z,2022-03-25T19:51:59.218Z,0
2,623e30db29f0a800b9b76b4b,Aavegotchi,GHST,0x3F382DbD960E3a9bbCeaE22651E88158d2791550,623c7bc416dc4a608cd24b00,https://s2.coinmarketcap.com/static/img/coins/...,18,2.012771,22229640.0,-26.2485,...,0.515132,-15.602256,125211400.0,0.0063,128762300.0,2022-03-25T21:13:00Z,True,2022-03-25T21:15:07.758Z,2022-03-25T21:15:07.758Z,0
3,623e30dd29f0a800b9b76b4d,Decentraland,MANA,0x0f5d2fb29fb7d3cfee444a200298f468908cc942,623c7bc416dc4a608cd24b00,https://s2.coinmarketcap.com/static/img/coins/...,18,2.604602,462027500.0,-35.9028,...,25.014858,-30.904646,4794600000.0,0.2399,5713784000.0,2022-03-25T21:13:00Z,True,2022-03-25T21:15:09.820Z,2022-03-25T21:15:09.820Z,0
4,623e30df29f0a800b9b76b4f,Magic Ethereum Money,MEM,0x343e59d9d835e35b07fe80f5bb544f8ed1cd3b11,623c7bc416dc4a608cd24b00,https://s2.coinmarketcap.com/static/img/coins/...,18,5e-06,97510.28,102.6711,...,37.165415,37.165415,0.0,0.0,4969091.0,2022-03-25T21:13:00Z,True,2022-03-25T21:15:11.304Z,2022-03-25T21:15:11.304Z,0


In [4]:
dao_data.shape

(126, 24)

In [5]:
dao_data.pop("_id/$oid")
dao_data.pop("name")
dao_data.pop("symbol")
dao_data.pop("contractAddress")
dao_data.pop("chain/$oid")
dao_data.pop("logo")
dao_data.pop("decimal")
dao_data.pop("financialData/lastUpdated/$date")
dao_data.pop("live")
dao_data.pop("createdAt/$date")
dao_data.pop("updatedAt/$date")
dao_labels = dao_data.pop("__v")

In [6]:
dao_data.shape

(126, 12)

In [7]:
input_x = {}   
for name, column in dao_data.items():
    dtype = column.dtype
    if dtype == object:
        dtype = tf.string
    else:
        dtype = tf.float32
    input_x[name] = tf.keras.Input(shape=(1,), name=name, dtype=dtype)

input_x

{'financialData/price': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/price')>,
 'financialData/volume24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/volume24h')>,
 'financialData/volumeChange24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/volumeChange24h')>,
 'financialData/percentChange1h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange1h')>,
 'financialData/percentChange24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange24h')>,
 'financialData/percentChange7d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange7d')>,
 'financialData/percentChange30d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange30d')>,
 'financialData/percentChange60d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData

In [8]:
num_inputs = {name: input for name, input in input_x.items() if input.dtype == tf.float32} #filter for numerical inputs in csv

x = tf.keras.layers.Concatenate()(list(num_inputs.values())) #concatenate list of numerical input values
norm = preprocessing.Normalization() #create normalization layer
norm.adapt(np.array(dao_data[num_inputs.keys()])) #adapt array of column keys for numerical input values
all_num_inputs = norm(x) #normalize numerical inputs
preprocessed_inputs = [all_num_inputs] #assign numerical inputs to a list

all_num_inputs


<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'normalization')>

In [9]:
for name, input in input_x.items():
    if input.dtype != tf.string: #determine whether input is string or not 
        continue #proceed if not string
    
    lookup = tf.keras.preprocessing.StringLookup(vocabulary=np.unique(dao_data[name])) #lookup unique vocab in columns
    one_hot = tf.keras.preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size()) #one hot encode the strings to max_tokens

#    x = tf.strings.lower(name)
#    x = tf.strings.strip(x)
    x = lookup(input) #apply string lookup to input
    x = one_hot(x) #encode x into one hots
    preprocessed_inputs.append(x) #append the encoded string to the list of preprocessed inputs
    
preprocessed_inputs

'''
#Deal with String Inputs
for name, input in inputs.items():
    if input.dtype != tf.string:
        continue

    lookup = preprocessing.StringLookup(vocabulary=np.unique(items_features[name]))
    one_hot = preprocessing.CategoryEncoding(max_tokens=lookup.vocab_size())

    x = lookup(input)
    x = one_hot(x)
    preprocessed_inputs.append(x) # append preprocessed feature to features list
    
preprocessed_inputs
'''

[<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'normalization')>]

In [10]:
preprocessing_layer = tf.keras.Model(input_x, preprocessed_inputs, name="Processed DAO Data") #create preprocessing layer fitted to numerical inputs and preprocessed inputs

In [11]:
dao_features_dict = {name: np.array(value) for name, value in dao_data.items()} #place all values into nummpy array
two_dao_sample_dict = {name:values[1:3,] for name, values in dao_features_dict.items()} #create dictionary for 2 samples of data
two_sample_fitted = preprocessing_layer(two_dao_sample_dict) #apply preprocessing layer to sample dictionary
two_dao_sample_dict

{'financialData/price': array([2.01277126, 2.60460218]),
 'financialData/volume24h': array([2.22296429e+07, 4.62027472e+08]),
 'financialData/volumeChange24h': array([-26.2485, -35.9028]),
 'financialData/percentChange1h': array([-0.0839251 ,  0.08114252]),
 'financialData/percentChange24h': array([-0.81904903, -1.60651805]),
 'financialData/percentChange7d': array([2.86166081, 6.57703982]),
 'financialData/percentChange30d': array([ 1.96809949, -0.74814829]),
 'financialData/percentChange60d': array([ 0.51513232, 25.01485821]),
 'financialData/percentChange90d': array([-15.60225555, -30.90464572]),
 'financialData/marketCap': array([1.25211404e+08, 4.79460032e+09]),
 'financialData/marketCapDominance': array([0.0063, 0.2399]),
 'financialData/fullyDilutedMarketCap': array([1.28762262e+08, 5.71378412e+09])}

In [12]:
full_dim = two_sample_fitted.shape.as_list()[1]

encoding_dim1 = 128
encoding_dim2 = 16
encoding_dim3 = 3 

encoder_input_data = keras.Input(shape=(full_dim,))

encoded_layer1 = keras.layers.Dense(encoding_dim1, activation='relu')(encoder_input_data)
encoded_layer2 = keras.layers.Dense(encoding_dim2, activation='relu')(encoded_layer1)
encoded_layer3 = keras.layers.Dense(encoding_dim3, activation='relu', name="Decoder - Clustering Layer")(encoded_layer2)

encoder_model = keras.Model(encoder_input_data, encoded_layer3)

decoded_layer3 = keras.layers.Dense(encoding_dim2, activation='relu')(encoded_layer3)
decoded_layer2 = keras.layers.Dense(encoding_dim1, activation='relu')(decoded_layer3)
decoded_layer1 = keras.layers.Dense(full_dim, activation='sigmoid')(decoded_layer2)

autoencoder_model = keras.Model(encoder_input_data, outputs=decoded_layer1, name="Encoder - Declustering Layer")

autoencoder_model.compile(optimizer="RMSprop", loss=tf.keras.losses.mean_squared_error)

autoencoder_model.summary()
print(autoencoder_model.get_weights, autoencoder_model.get_config)

Model: "Encoder"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         [(None, 12)]              0         
_________________________________________________________________
dense (Dense)                (None, 128)               1664      
_________________________________________________________________
dense_1 (Dense)              (None, 16)                2064      
_________________________________________________________________
ClusteringLayer (Dense)      (None, 3)                 51        
_________________________________________________________________
dense_2 (Dense)              (None, 16)                64        
_________________________________________________________________
dense_3 (Dense)              (None, 128)               2176      
_________________________________________________________________
dense_4 (Dense)              (None, 12)                1548

In [14]:
p_items = preprocessing_layer(dao_features_dict)

train_data, test_data, train_labels, test_labels = train_test_split(p_items.numpy(), dao_labels, train_size=0.8, random_state=5)

history = autoencoder_model.fit(train_data, train_data, epochs=1000, batch_size=256, shuffle=True, validation_data=(test_data, test_data))

Epoch 1/1000
Epoch 2/1000
Epoch 3/1000
Epoch 4/1000
Epoch 5/1000
Epoch 6/1000
Epoch 7/1000
Epoch 8/1000
Epoch 9/1000
Epoch 10/1000
Epoch 11/1000
Epoch 12/1000
Epoch 13/1000
Epoch 14/1000
Epoch 15/1000
Epoch 16/1000
Epoch 17/1000
Epoch 18/1000
Epoch 19/1000
Epoch 20/1000
Epoch 21/1000
Epoch 22/1000
Epoch 23/1000
Epoch 24/1000
Epoch 25/1000
Epoch 26/1000
Epoch 27/1000
Epoch 28/1000
Epoch 29/1000
Epoch 30/1000
Epoch 31/1000
Epoch 32/1000
Epoch 33/1000
Epoch 34/1000
Epoch 35/1000
Epoch 36/1000
Epoch 37/1000
Epoch 38/1000
Epoch 39/1000
Epoch 40/1000
Epoch 41/1000
Epoch 42/1000
Epoch 43/1000
Epoch 44/1000
Epoch 45/1000
Epoch 46/1000
Epoch 47/1000
Epoch 48/1000
Epoch 49/1000
Epoch 50/1000
Epoch 51/1000
Epoch 52/1000
Epoch 53/1000
Epoch 54/1000
Epoch 55/1000
Epoch 56/1000
Epoch 57/1000
Epoch 58/1000
Epoch 59/1000
Epoch 60/1000
Epoch 61/1000
Epoch 62/1000
Epoch 63/1000
Epoch 64/1000
Epoch 65/1000
Epoch 66/1000
Epoch 67/1000
Epoch 68/1000
Epoch 69/1000
Epoch 70/1000
Epoch 71/1000
Epoch 72/1000
E