In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from matplotlib import animation  
from mpl_toolkits.mplot3d import Axes3D

from sklearn.model_selection import train_test_split
from sklearn.cluster import KMeans
from sklearn import manifold

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers.experimental import preprocessing

In [2]:
dao_data = pd.read_csv("tokens.csv")
dao_data = dao_data.copy().dropna()

dao_data.pop("_id/$oid")
dao_data.pop("name")
dao_data.pop("symbol")
dao_data.pop("contractAddress")
dao_data.pop("chain/$oid")
dao_data.pop("logo")
dao_data.pop("decimal")
dao_data.pop("financialData/lastUpdated/$date")
dao_data.pop("live")
dao_data.pop("createdAt/$date")
dao_data.pop("updatedAt/$date")
dao_labels = dao_data.pop("__v")

dao_data.head(n = 5)

Unnamed: 0,financialData/price,financialData/volume24h,financialData/volumeChange24h,financialData/percentChange1h,financialData/percentChange24h,financialData/percentChange7d,financialData/percentChange30d,financialData/percentChange60d,financialData/percentChange90d,financialData/marketCap,financialData/marketCapDominance,financialData/fullyDilutedMarketCap
1,107.118305,23941030.0,98.6279,-2.590717,2.321428,9.309774,-20.503689,-29.946043,-47.405427,128138600.0,0.0064,167420000.0
2,2.012771,22229640.0,-26.2485,-0.083925,-0.819049,2.861661,1.968099,0.515132,-15.602256,125211400.0,0.0063,128762300.0
3,2.604602,462027500.0,-35.9028,0.081143,-1.606518,6.57704,-0.748148,25.014858,-30.904646,4794600000.0,0.2399,5713784000.0
4,5e-06,97510.28,102.6711,-3.107993,-1.08908,-20.801173,23.767579,37.165415,37.165415,0.0,0.0,4969091.0
5,4.283644,736507.2,3.7986,-0.345016,-2.939907,8.500397,-31.746554,-32.329757,-40.635534,8173324.0,0.0,8191968.0


In [3]:
x_inputs = {}

for col_name, column in dao_data.items():
    dtype = column.dtype
    if dtype == object:
        dtype = tf.string
    else:
        dtype = tf.float32
    x_inputs[col_name] = tf.keras.Input(shape = (1,), name = col_name, dtype = dtype)

x_inputs

{'financialData/price': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/price')>,
 'financialData/volume24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/volume24h')>,
 'financialData/volumeChange24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/volumeChange24h')>,
 'financialData/percentChange1h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange1h')>,
 'financialData/percentChange24h': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange24h')>,
 'financialData/percentChange7d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange7d')>,
 'financialData/percentChange30d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData/percentChange30d')>,
 'financialData/percentChange60d': <KerasTensor: shape=(None, 1) dtype=float32 (created by layer 'financialData

In [5]:
num_inputs = {name: input for name, input in x_inputs.items() if input.dtype == tf.float32}

x = tf.keras.layers.Concatenate()(list(num_inputs.values()))
normalization = tf.keras.layers.experimental.preprocessing.Normalization()
normalization.adapt(np.array(dao_data[num_inputs.keys()]))
all_num_inputs = normalization(x)

preprocessed_inputs = [all_num_inputs]

preprocessed_inputs

[<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'normalization')>]

In [7]:
for name, input in x_inputs.items():
    if input.dtype != tf.string:
        continue

    str_lookup = tf.keras.layers.experimental.preprocessing.StringLookup(vocabulary = np.unique(dao_data[col_name]))
    one_hot_enc = tf.keras.layers.experimental.preprocessing.CategoryEncoding(max_tokens = str_lookup.vocab_size())

    x = str_lookup(input)
    x = one_hot_enc(x)
    preprocessed_inputs.append(x)

preprocessing_layer = tf.keras.Model(x_inputs, preprocessed_inputs)
preprocessed_inputs

[<KerasTensor: shape=(None, 12) dtype=float32 (created by layer 'normalization')>]