In [1]:
!pip install transformers

Collecting transformers
  Downloading transformers-4.30.2-py3-none-any.whl (7.2 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.2/7.2 MB[0m [31m62.1 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub<1.0,>=0.14.1 (from transformers)
  Downloading huggingface_hub-0.15.1-py3-none-any.whl (236 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m236.8/236.8 kB[0m [31m23.5 MB/s[0m eta [36m0:00:00[0m
Collecting tokenizers!=0.11.3,<0.14,>=0.11.1 (from transformers)
  Downloading tokenizers-0.13.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (7.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m7.8/7.8 MB[0m [31m69.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting safetensors>=0.3.1 (from transformers)
  Downloading safetensors-0.3.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.3/1.3 MB[0m [31m79.5 MB/s[0m eta [36m0:00:0

In [2]:
import numpy as np
import tensorflow as tf
import keras
import tensorflow_probability as tfp
from tensorflow.keras.layers import Input, Dense, Reshape, UpSampling2D, Conv2D
from tensorflow.keras.models import Model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras import layers
from transformers import LongformerModel
from sklearn.preprocessing import MinMaxScaler

In [3]:
# Set the input and output shapes
INPUT_SHAPE = (128, 128, 4)  # Input shape of the model
OUTPUT_SHAPE = (128, 128, 1)  # Output shape of the model


In [4]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [5]:
min_max = '/content/drive/MyDrive/example1/min_max_scale_new.npy'
min, max = np.load(min_max, encoding='bytes')
print(min, max)

-236.2857142857143 295.5714285714286


In [6]:
# Load and preprocess the data
def load_data(file_path):
    data = np.load(file_path)
    return data

def preprocess_data(file_list):
    # data = []
    # labels = []
    loaded_data = []
    for file_name in file_list:
        file_path = '/content/drive/MyDrive/example1/' + file_name
        loaded_data.append(load_data(file_path))
    return np.array(loaded_data)


def generate_data(data,min_train,max_train):
    # data shape=(n_samples, row, col, timesteps)
    n_samples=data.shape[0]
    time_step=data.shape[3]
    row=128
    col=128

    # replace the pixel of "no echo (-127)" as 0
    data[data<=-127]=0

    # for training dataset (t-30,t-20,t-10,t,1+10)
    n_frames=4
    movie_in=np.zeros((n_samples,row,col,n_frames))
    movie_out=np.zeros((n_samples,row,col,1))
    for i in range(n_samples):
        for j in range(n_frames):
            m_in=(255.*((data[i,::,::,j]+10.)/70.))+0.5
            movie_in[i,::,::,j]=m_in
        m_out=(255.*((data[i,::,::,-1]+10.)/70.))+0.5
        movie_out[i,::,::,0]=m_out

    # Min-max scaling
    movie_in=(movie_in-min_train)/(max_train-min_train)
    movie_out=(movie_out-min_train)/(max_train-min_train)

    return movie_in, movie_out


In [7]:
# Load the training and testing data
train_file_list = np.loadtxt('/content/drive/MyDrive/example1/radar_events_train.txt', dtype=str)
test_file_list = np.loadtxt('/content/drive/MyDrive/example1/radar_events_test.txt', dtype=str)

data_train = preprocess_data(train_file_list)
data_test = preprocess_data(test_file_list)


x_train, y_train = generate_data(data_train, min, max)
x_test, y_test = generate_data(data_test, min, max)

In [8]:
# Vision Transformer

image_size = 128
patch_size = 16
num_patches = (image_size//patch_size) **2

# hyper parameters for the ViT module
projection_dim = 64 # size of hidden dimension feature vectors in our model
                     # project patches into these 64-dimensional feature vectors that are concatenated together
                     # as input to encoder layer

num_heads = 8  #no. of heads in multi head attention layer, four separate transformations

# We are going to overriding the mlp layer to add a skip connection like resnet.
# In transformer, we have skip connection from the output of self attention and then it goes to feed
# forward layer that's going to compress the dimensionality and then you skip ahead with the previous output
# from the previous dense layer

#parametrization of the skip connection
transformer_units = [projection_dim*2,projection_dim]
#two stacked transformer layers
transformer_layers = 8
mlp_head_units = [4096,2048]  # Size of the dense layers of the final classifier
#custom MLP to add skip connection
def mlp(x, hidden_units, dropout_rate):
  for units in hidden_units:
    x = layers.Dense(units, activation=tf.nn.gelu)(x)
    x = layers.Dropout(dropout_rate)(x)
  return x                                                                                                                                                                                                    #overriding keras layer object to implement the patches layer
class Patches(layers.Layer):
    def __init__(self, patch_size, **kwargs):
        super(Patches, self).__init__()
        self.patch_size = patch_size

    def get_config(self):
      config = super().get_config()
      config.update({
          'patch_size': self.patch_size
      })
      return config


    def call(self, images):
        batch_size = tf.shape(images)[0]
        patches = tf.image.extract_patches(
            images=images,
            sizes=[1, self.patch_size, self.patch_size, 1],
            strides=[1, self.patch_size, self.patch_size, 1],
            rates=[1, 1, 1, 1],
            padding="VALID",
        )
        patch_dims = patches.shape[-1]
        patches = tf.reshape(patches, [batch_size, -1, patch_dims])
        return patches                                                                                                                                                                               # The PatchEncoder layer will linearly transform a patch by projecting it into a
# vector of size projection_dim. In addition, it adds a learnable position
# embedding to the projected vector.
class PatchEncoder(layers.Layer):
    def __init__(self, num_patches, **kwargs):
        super(PatchEncoder, self).__init__()
        self.num_patches = num_patches
        self.projection = layers.Dense(units=projection_dim)
        self.position_embedding = layers.Embedding(
            input_dim=num_patches, output_dim=projection_dim
        )

    def get_config(self):
      config = super().get_config()
      config.update({
          'num_patches': self.num_patches,
          'projection': self.projection,
          'position_embedding': self.position_embedding
      })
      return config

    def call(self, patch):
        positions = tf.range(start=0, limit=self.num_patches, delta=1)
        encoded = self.projection(patch) + self.position_embedding(positions)
        return encoded
def create_vit(inputs):
    # Create patches.
    patches = Patches(patch_size)(inputs)
    # Encode patches.
    encoded_patches = PatchEncoder(num_patches=num_patches, projection_dim=projection_dim)(patches)
    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization 1.
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        # Create a multi-head attention layer.
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=projection_dim, dropout=0.1
        )(x1, x1)
        # Skip connection 1.
        x2 = layers.Add()([attention_output, encoded_patches])
        # Layer normalization 2.
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        # MLP.
        x3 = mlp(x3, hidden_units=transformer_units, dropout_rate=0.1)
        # Skip connection 2.
        encoded_patches = layers.Add()([x3, x2])

    return encoded_patches

def create_model(input_shape):
    inputs = Input(shape=input_shape)

    # Split the input tensor into individual timestamps
    timestamps = tf.split(inputs, num_or_size_splits=4, axis=-1)

    llayers = []
    for timestamp in timestamps:
        timestamp = tf.reshape(timestamp, [-1, 128, 128, 1])
        llayers.append(create_vit(timestamp))

    vit = tf.stack(llayers, axis=-1)
    vit = tf.reshape(vit, (-1, 64, 64, 4))  # Reshape the ViT output to (None, 64, 64, 4)

    # Reshape the data to match LSTM input requirements
    reshaped_vit = Reshape(target_shape=(64*64, 4))(vit)

    lstm = layers.LSTM(128, input_shape=(64*64, 4), return_sequences=True)(reshaped_vit)
    # Upsampling decoder
    decoded = Reshape(target_shape=(64, 64, 128))(lstm)
    decoded = UpSampling2D(size=(2, 2))(decoded)
    output = Conv2D(1, (3, 3), activation='relu', padding='same')(decoded)

    model = Model(inputs=inputs, outputs=output)
    return model

input_shape = (128, 128, 4)
model = create_model(input_shape)
print(model.summary())

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 input_1 (InputLayer)           [(None, 128, 128, 4  0           []                               
                                )]                                                                
                                                                                                  
 tf.split (TFOpLambda)          [(None, 128, 128, 1  0           ['input_1[0][0]']                
                                ),                                                                
                                 (None, 128, 128, 1                                               
                                ),                                                                
                                 (None, 128, 128, 1                                           

In [9]:
optimizer = keras.optimizers.Adam(lr=0.0001)
model.compile(loss='binary_crossentropy',
              optimizer=optimizer)

  super().__init__(name, **kwargs)


In [10]:
# Train the model
model.fit(x_train, y_train, batch_size=8, epochs=600)

Epoch 1/600
Epoch 2/600
Epoch 3/600
Epoch 4/600
Epoch 5/600
Epoch 6/600
Epoch 7/600
Epoch 8/600
Epoch 9/600
Epoch 10/600
Epoch 11/600
Epoch 12/600
Epoch 13/600
Epoch 14/600
Epoch 15/600
Epoch 16/600
Epoch 17/600
Epoch 18/600
Epoch 19/600
Epoch 20/600
Epoch 21/600
Epoch 22/600
Epoch 23/600
Epoch 24/600
Epoch 25/600
Epoch 26/600
Epoch 27/600
Epoch 28/600
Epoch 29/600
Epoch 30/600
Epoch 31/600
Epoch 32/600
Epoch 33/600
Epoch 34/600
Epoch 35/600
Epoch 36/600
Epoch 37/600
Epoch 38/600
Epoch 39/600
Epoch 40/600
Epoch 41/600
Epoch 42/600
Epoch 43/600
Epoch 44/600
Epoch 45/600
Epoch 46/600
Epoch 47/600
Epoch 48/600
Epoch 49/600
Epoch 50/600
Epoch 51/600
Epoch 52/600
Epoch 53/600
Epoch 54/600
Epoch 55/600
Epoch 56/600
Epoch 57/600
Epoch 58/600
Epoch 59/600
Epoch 60/600
Epoch 61/600
Epoch 62/600
Epoch 63/600
Epoch 64/600
Epoch 65/600
Epoch 66/600
Epoch 67/600
Epoch 68/600
Epoch 69/600
Epoch 70/600
Epoch 71/600
Epoch 72/600
Epoch 73/600
Epoch 74/600
Epoch 75/600
Epoch 76/600
Epoch 77/600
Epoch 78

<keras.callbacks.History at 0x7f35b2c99330>

In [12]:
# Evaluate the model on test data
loss = model.evaluate(x_test, y_test, verbose=0)
print(loss )

0.6484830975532532


In [13]:
predictions_transformer = model.predict(x_test)



In [11]:
# Save the trained model
model.save('transformer_model.h5')