In [1]:
!pip install transformers
#!pip install tensorflow-addons
#!pip install tf-transformers

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting transformers
  Downloading transformers-4.28.1-py3-none-any.whl (7.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.0/7.0 MB[0m [31m48.8 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m25.8 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting huggingface-hub<1.0,>=0.11.0
  Downloading huggingface_hub-0.14.1-py3-none-any.whl (224 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m224.5/224.5 kB[0m [31m14.0 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: tokenizers, huggingface-hub, transformers
Successfully installed huggingface-hub-0.14.1 tokenizers-0.13.3 transformers-4.28.1


In [2]:
import tensorflow
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.layers import DepthwiseConv1D, SeparableConv1D
#from tensorflow.keras.utils import Transformer, MultiHeadAttention, LayerNormalization, Dropout
import transformers
from transformers import BertConfig
from google.colab import drive
from sklearn.preprocessing import OneHotEncoder
from transformers.models.bert.modeling_tf_bert import TFBertEncoder, TFBertEmbeddings
from sklearn.preprocessing import StandardScaler
#from tensorflow_addons.layers import TransformerEncoder
#import tf_transformers as tft
#from tf_transformers.layers import TransformerEncoder
#import tensorflow_addons as tfa

In [3]:
drive.mount('/content/drive')
data_path = '/content/drive/MyDrive/Columbia Spring 2023/Embedded AI/Conductor_wand/'

Mounted at /content/drive


In [4]:
# HELPER CLASSES
## VECTOR MAGNITUDE LAYER FOR IMU FUSION
class vectorMagFusion(tf.keras.layers.Layer):
  def __init__(self):
    super(vectorMagFusion, self).__init__()
  def build(self, input_shape):
    super(vectorMagFusion, self).build(input_shape)
  def call(self, inputs):
    return tf.norm(inputs,axis=-1)
  def compute_output_shape(self, input_shape):
    return (input_shape[0],1)

def scaled_dot_product_attention(q, k, v, mask):
  matmul_qk = tf.matmul(q, k, transpose_b=True)
  dk = tf.cast(tf.shape(k)[-1], tf.float32)
  scaled_attention_logits = matmul_qk / tf.math.sqrt(dk)
  if mask is not None:
    scaled_attention_logits += (mask * -1e9)
  attention_weights = tf.nn.softmax(scaled_attention_logits, axis=-1)
  output = tf.matmul(attention_weights, v)
  return output, attention_weights

class MultiHeadAttention(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads):
    super(MultiHeadAttention, self).__init__()
    self.num_heads = num_heads
    self.d_model = d_model
    # CHECK IF MODEL SIZING IS APPROPRIATE
    assert d_model % num_heads == 0
    # CALCULATE MODEL DEPTH
    self.depth = d_model//self.num_heads
    # QUERIES, KEYS, AND VALUES - LINEAR LAYERS TO MODEL LEARNABLE WEIGHTS
    self.wq = tf.keras.layers.Dense(d_model)
    self.wk = tf.keras.layers.Dense(d_model)
    self.wv = tf.keras.layers.Dense(d_model)
    # BUILD DENSE PATH
    self.dense = tf.keras.layers.Dense(d_model)
  def split_heads(self, x, batch_size):
    x = tf.reshape(x, (batch_size, -1, self.num_heads, self.depth)) # split last dimension into num heads and depth
    return tf.transpose(x,perm=[0,2,1,3]) # (batch_size, num_heads, seq_len, depth)

  def call(self, v, k, q, mask):
    batch_size = tf.shape(q)[0]
    # SPLIT AND RESHAPE Qs, Ks, and Vs INTO MULTIPLE HEADS
    q = self.wq(q)
    k = self.wk(k)
    v = self.wv(v)
    q = self.split_heads(q, batch_size) # (batchSize, numHeads, seqLen, depth)
    k = self.split_heads(k, batch_size) # (batchSize, numHeads, seqLen, depth)
    v = self.split_heads(v, batch_size) # (batchSize, numHeads, seqLen, depth)
    # PERFORM SCALED, ATTENTIONAL MULTIPLICATION OF Qs Ks and Vs
    scaled_attention, attention_weights = scaled_dot_product_attention(q, k, v, mask) 
    # TRANSPOSE AND 'CONCATENATE/SQUEEZE' 
    scaled_attention = tf.transpose(scaled_attention, perm=[0,2,1,3]) # (batchSize, seqLen, numHeads, depth)
    concat_attention = tf.reshape(scaled_attention, (batch_size, -1, self.d_model)) # (batchSize, seqLen, depth)
    # FEEDFORWARD OUTPUT 
    output = self.dense(concat_attention) # (batch_size, seq_len, d_model)
    return output, attention_weights
  
  def get_config(self):
    config = super().get_config().copy()
    config.update({
        'depth' : self.depth,
        'wq' : self.wq,
        'wk' : self.wk,
        'wv' : self.wv,
        'dense' : self.dense,
    })
    return config
    
def point_wise_feed_forward_network(out_size, dff):
  return tf.keras.Sequential([
      tf.keras.layers.Dense(dff, activation='relu'),
      # tf.keras.layers.Dense(d_model)
      tf.keras.layers.Dense(out_size)
  ])
  
class PositionalEncoder(tf.keras.layers.Layer):
  def __init__(self, position_len, d_model):
    super().__init__()
    self.pos_encoding = self.positional_encoding(position_len, d_model)     

  def positional_encoding(self, position_len, d_model):
    positions = np.arange(position_len)[:,np.newaxis]
    d_idxs = np.arange(d_model)[np.newaxis,:]
    theta_arr = positions * (1 / (np.power(10000,(2*(d_idxs//2))/np.float32(d_model))))
    sines = np.sin(theta_arr[:, 0::2])
    cosines = np.cos(theta_arr[:, 1::2])

    # concatenate sines and cosines into an embedding sized matrix
    pos_encoding = np.concatenate([sines, cosines],axis=-1)
    pos_encoding = pos_encoding[np.newaxis,...]
    return tf.cast(pos_encoding, dtype=tf.float32)

  def call(self, x):
    # print(tf.shape(self.pos_encoding[:, :tf.shape(inputs)[1],:]))
    return x + self.pos_encoding[:, :tf.shape(x)[1],:]

class TransformerEncoder(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, out_size, rate=0.1):
      super(TransformerEncoder, self).__init__()        
      self.d_model = d_model
      self.num_heads = num_heads
      self.dff = dff
      self.rate = rate
      self.out_size = out_size
      
      self.mha = MultiHeadAttention(d_model, num_heads)
      self.ffn = point_wise_feed_forward_network(d_model, dff)
      

      self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
      self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
      
      self.dropout1 = tf.keras.layers.Dropout(rate)
      self.dropout2 = tf.keras.layers.Dropout(rate)
      
  def call(self, x, training=None):
      attn_output, _ = self.mha(x, x, x, None)
      attn_output = self.dropout1(attn_output, training=training)
      out1 = self.layernorm1(x + attn_output)
      ffn_output = self.ffn(out1)
      # print('ffn_out1: ',np.shape(ffn_output))
      ffn_output = self.dropout2(ffn_output, training=training)
      out2 = self.layernorm2(out1 + ffn_output)
      # print('transf out: ', np.shape(out2))
      return out2

class TransformerEncoderOut(tf.keras.layers.Layer):
  def __init__(self, d_model, num_heads, dff, out_size, rate=0.1):
      super(TransformerEncoderOut, self).__init__()        
      self.d_model = d_model
      self.num_heads = num_heads
      self.dff = dff
      self.rate = rate
      self.out_size = out_size
      
      self.mha = MultiHeadAttention(d_model, num_heads)
      self.ffn = point_wise_feed_forward_network(out_size, dff)
      

      self.layernorm1 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
      self.layernorm2 = tf.keras.layers.LayerNormalization(epsilon=1e-6)
      
      self.dropout1 = tf.keras.layers.Dropout(rate)
      self.dropout2 = tf.keras.layers.Dropout(rate)
      
  def call(self, x, training=None):
      attn_output, _ = self.mha(x, x, x, None)
      attn_output = self.dropout1(attn_output, training=training)
      out1 = self.layernorm1(x + attn_output)
      ffn_output = self.ffn(out1)
      # print('ffn_out1: ',np.shape(ffn_output))
      ffn_output = self.dropout2(ffn_output, training=training)
      out2 = self.layernorm2(ffn_output)
      # print('transf out: ', np.shape(out2))
      return out2

  def get_config(self):
    config = super().get_config().copy()
    config.update({
        'num_heads': self.num_heads,
        'rate' : self.rate,
        'd_model' : self.d_model,
        'num_heads' : self.num_heads,
        'dropout1' : self.dropout1,
        'dropout2' : self.dropout2,
        'layernorm1' : self.layernorm1,
        'layernorm2' : self.layernorm2,
        'mha' : self.mha,
        'ffn' : self.ffn
    })
    return config

In [56]:
# TRANSFORMER MODEL
## HYPER PARAMETERS
num_layers = 3 # three encoder layers stacked
d_model = 6 # dimensionality of transformer embedding matches number of IMU axes (potential performance bottleneck)
dff = 15 # number of hidden FF layers in a transformer module
num_heads = 3 # three attention heads per layer (dim / num_heads = 2)
dropout_rate = 0.5
rate = dropout_rate
Fs = 119
num_seconds = 2
seq_len = Fs * num_seconds
num_axes = 6
vocab_size= len(range(60,125,5))
input_shape = (238,32)
num_filters = 6
out_size = 13
## ARCHITECTURE
config = BertConfig(vocab_size=vocab_size,hidden_size=d_model,num_hidden_layers=num_layers, num_attention_heads=num_heads, max_position_embeddings=seq_len, output_attentions=False, output_hidden_states=False)
inputs = tf.keras.layers.Input(shape=(seq_len,num_axes), dtype=tf.float16)
# vmLayer = vectorMagFusion()(inputs)
# vmLayer = tf.cast(vmLayer, tf.int32)
print(tf.shape(inputs))

# CONVOLUTION LAYER
convLayer1 = SeparableConv1D(
    filters = d_model*2,
    kernel_size = 3,
    strides=1,
    padding='same',
    activation='relu',
    input_shape=(seq_len,num_axes)
)(inputs)

# TRANSFORMER ENCODER 
posEncodeLayer = PositionalEncoder(seq_len, d_model*2)(convLayer1)
encodeLayer = TransformerEncoder(d_model*2, num_heads, dff, out_size, rate)(posEncodeLayer)
for i in range(1,num_layers):  
  encodeLayer = TransformerEncoder(d_model*2, num_heads, dff, out_size, rate)(encodeLayer)

# LayerNorm 
layerNorm = tf.keras.layers.LayerNormalization(axis=-1, center=True, scale=True)(encodeLayer)
flat1 = tf.keras.layers.Flatten(input_shape=(seq_len,num_axes))(layerNorm)
# FLATTEN
print('flat1: ',tf.shape(flat1))
print('layerNorm: ',tf.shape(layerNorm))

dense1 = tf.keras.layers.Dense(714, activation='gelu')(flat1)
dense2 = tf.keras.layers.Dense(357, activation='gelu')(dense1)
dense3 = tf.keras.layers.Dense(13, activation='softmax')(dense2)

# outFinal = tf.squeeze(out2,axis=2)
# print(tf.shape(outFinal))
TFmodel = tf.keras.Model(inputs=inputs, outputs=dense3)


KerasTensor(type_spec=TensorSpec(shape=(3,), dtype=tf.int32, name=None), inferred_value=[None, 238, 6], name='tf.compat.v1.shape_40/Shape:0', description="created by layer 'tf.compat.v1.shape_40'")
flat1:  KerasTensor(type_spec=TensorSpec(shape=(2,), dtype=tf.int32, name=None), inferred_value=[None, 2856], name='tf.compat.v1.shape_41/Shape:0', description="created by layer 'tf.compat.v1.shape_41'")
layerNorm:  KerasTensor(type_spec=TensorSpec(shape=(3,), dtype=tf.int32, name=None), inferred_value=[None, 238, 12], name='tf.compat.v1.shape_42/Shape:0', description="created by layer 'tf.compat.v1.shape_42'")


In [24]:
data = {}
NUM_CLASSES = 13
x_train, y_train, x_test, y_test = [], [], [], []
split = 47
Fs = 119
data_len = 60 # 60 seconds

In [25]:
#uncomment if you've uploaded a ZIP file, and change the folder
#!unzip condSamples.zip
scaler = StandardScaler()
for i in range(60,125,5):
    data[i] = np.loadtxt(data_path + str(i)+"bpm3.csv",skiprows=1,delimiter=',')[0:119*60,1:]

for i, array in data.items():
    # print(np.shape(array))
    scaler.fit(array)
    normArray = scaler.transform(array)
    for j in range(0, 59):
        if j < split:
            x_train.append(normArray[j*119:(j+2)*119])
            y_train.append(i)
            # y_train.append(np.ones((Fs*2)) * i)
            #print(i)
        else:
            x_test.append(array[j*119:(j+2)*119])
            # y_test.append(np.ones((Fs*2)) * i)
            y_test.append(i)
#print(x_test)
x_train = np.array(x_train)
y_train = np.array(y_train)
x_test = np.array(x_test)
y_test = np.array(y_test)
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(611, 238, 6)
(611,)
(156, 238, 6)
(156,)


In [57]:
from tensorflow.keras.optimizers import Adam
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)
# enc = enc.fit(y_train.reshape(-1,1))
enc = enc.fit(y_train.reshape(-1,1))
y_train_enc = enc.transform(y_train.reshape(-1,1))
# y_train = enc.transform(y_train.reshape(-1, 1))
# y_train_enc = enc.transform(y_train.reshape(-1,1)).reshape(y_train.shape[0],y_train.shape[1],-1)
enc = enc.fit(y_test.reshape(-1,1))
y_test_enc = enc.transform(y_test.reshape(-1,1))
# y_test = enc.transform(y_test.reshape(1, -1))
# y_test_enc = enc.transform(y_test.reshape(-1,1)).reshape(y_test.shape[0],y_test.shape[1],-1)
lr = 0.001
optimizer = Adam(learning_rate=lr)
TFmodel.compile(loss='categorical_crossentropy',
              optimizer=optimizer, metrics=['acc'])



In [27]:
print(x_train.shape)
print(y_train_enc.shape)
print(x_test.shape)
print(y_test_enc.shape)

(611, 238, 6)
(611, 13)
(156, 238, 6)
(156, 13)


In [28]:
print(x_train[0][:])

[[ 1.78133623  1.08477004  0.10670362 -2.19204676 -0.04775503  1.30778738]
 [ 1.62530535  1.16372788 -0.12255043 -2.35370403 -0.03546806  1.41973908]
 [ 1.43583929  1.18984926 -0.32729305 -2.56414732  0.01895181  1.50816376]
 ...
 [ 1.52269392 -1.14979597  0.52628179 -0.51233384  0.50346156  1.7977756 ]
 [ 1.36397285 -1.14801497  0.39471304 -0.62807781  0.49644085  1.97219265]
 [ 1.29133779 -0.96279058 -0.1477828  -0.75721211  0.34722553  2.12146114]]


In [58]:
history = TFmodel.fit(
    x_train, y_train_enc,
    epochs=40,
    batch_size=64,
    validation_split=0.1,
    shuffle=True
)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


In [22]:
enc = OneHotEncoder(handle_unknown='ignore', sparse=False)

enc = enc.fit(y_train.reshape(-1, 1))

y_train = enc.transform(y_train.reshape(-1, 1))
y_test = enc.transform(y_test.reshape(-1, 1))
model = keras.Sequential()
model.add(
    keras.layers.Bidirectional(
        keras.layers.LSTM(
            units=128,
            input_shape=[x_train.shape[1], x_train.shape[2]]
        )
    )
)
model.add(keras.layers.Dropout(rate=0.5))
model.add(keras.layers.Dense(units=128, activation='relu'))
model.add(keras.layers.Dense(y_train.shape[1], activation='softmax'))
model.compile(loss='categorical_crossentropy',
              optimizer='adam', metrics=['acc'])

