In [1]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

/kaggle/input/ace-dataset/ACE_dataset.csv
/kaggle/input/ace-dataset/ACE_dataset.fasta
/kaggle/input/ace-dataset/features/ACE_ASDC.csv
/kaggle/input/ace-dataset/features/opf_7bit_type_1_features.csv
/kaggle/input/ace-dataset/features/opf_7bit_type_2_features.csv
/kaggle/input/ace-dataset/features/esmv1_feat_ACE.csv
/kaggle/input/ace-dataset/features/ACE_embeddings_prot_t5_xl_bfd.csv
/kaggle/input/ace-dataset/features/esm2_t6_8M_feat_ACE.csv
/kaggle/input/ace-dataset/features/opf_7bit_type_3_features.csv
/kaggle/input/ace-dataset/features/opf_10bit_features.csv
/kaggle/input/ace-dataset/features/ACE_AAC.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-500.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-300.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-50.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-400.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-450.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-350.csv
/kaggle/input/ace-dataset/auto_enco_feat/AEDN-250.csv
/kaggle/in

In [3]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import scale
from keras.saving import register_keras_serializable

# Load data
path = "/kaggle/input/ace-dataset/auto_enco_feat/"
data_ = pd.read_csv(path + 'AEDN-100.csv')

data_np = np.array(data_)
data = scale(data_np[:, 1:])

label1 = np.ones((394, 1))  # Value can be changed
label2 = np.zeros((626, 1))
labels = np.append(label1, label2)

# Define the positional encoding function
def positional_encoding(positions, d):
    pos = np.arange(positions)[:, np.newaxis]
    k = np.arange(d)[np.newaxis, :]
    i = k // 2
    angle_rads = pos / (10000 ** (2 * i / d))

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

@register_keras_serializable()
class TransformerModel(keras.Model):
    def __init__(self, input_vocab_size, d_model, num_heads, ff_dim, rate=0.1, maxlen=50):
        super(TransformerModel, self).__init__()

        self.embedding = layers.Embedding(input_vocab_size, d_model)
        self.PE = positional_encoding(maxlen, d_model)
        self.transformer_block = TransformerBlock_Encode(d_model, num_heads, ff_dim, rate)
        self.transformer_block2 = TransformerBlock_decode(d_model, num_heads, ff_dim, rate)
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(512, activation="relu")
        self.fc3 = layers.Dense(256, activation="relu")
        self.fc2 = layers.Dense(1, activation="sigmoid")

    def call(self, inputs, training=None):
        x = self.embedding(inputs)
        y = self.transformer_block(x, training=training)
        x = self.transformer_block2(x, y, y, training=training)
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.fc3(x)
        return self.fc2(x)
    
    @classmethod
    def from_config(cls, config):
        return cls(**config['config'])

class TransformerBlock_decode(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock_decode, self).__init__()

        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.att1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(d_model)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.dropout3 = layers.Dropout(rate)

    def call(self, inputs, q, k, training=None):
        attn_output = self.att(inputs, inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))

        attn_output1 = self.att1(q, k, out1)
        out2 = self.layernorm2(out1 + self.dropout2(attn_output1, training=training))

        ffn_output = self.ffn(out2)
        return self.layernorm3(out2 + self.dropout3(ffn_output, training=training))


class TransformerBlock_Encode(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock_Encode, self).__init__()
        self.con = layers.Conv1D(256, 5, padding='same')
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(d_model)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):
        inputs = self.con(inputs)
        attn_output = self.att(inputs, inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))

        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + self.dropout2(ffn_output, training=training))

# Define the model parameters
input_vocab_size = 100  # Replace with the actual vocabulary size
d_model = 256
num_heads = 4
ff_dim = 128

model = TransformerModel(input_vocab_size, d_model, num_heads, ff_dim)

# Convert data to tensor
data_tensor = tf.convert_to_tensor(data, dtype=tf.float32)

# Pass through the transformer encoder
transformed_features = model(data_tensor)

# Save transformed features
transformed_features = transformed_features.numpy()
transformed_df = pd.DataFrame(transformed_features, columns=[f'feature_{i}' for i in range(transformed_features.shape[1])])
transformed_df.to_csv('transformed_data.csv', index=False)


In [4]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import scale
from keras.saving import register_keras_serializable

# Load data
path = "/kaggle/input/ace-dataset/auto_enco_feat/"
data_ = pd.read_csv(path + 'AEDN-100.csv')

data_np = np.array(data_)
data = scale(data_np[:, 1:])

label1 = np.ones((394, 1))  # Value can be changed
label2 = np.zeros((626, 1))
labels = np.append(label1, label2)

# Define the positional encoding function
def positional_encoding(positions, d):
    pos = np.arange(positions)[:, np.newaxis]
    k = np.arange(d)[np.newaxis, :]
    i = k // 2
    angle_rads = pos / (10000 ** (2 * i / d))

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

@register_keras_serializable()
class TransformerModel(keras.Model):
    def __init__(self, input_vocab_size, d_model, num_heads, ff_dim, rate=0.1, maxlen=100):
        super(TransformerModel, self).__init__()

        self.embedding = layers.Embedding(input_vocab_size, d_model)
        self.PE = positional_encoding(maxlen, d_model)
        self.transformer_block = TransformerBlock_Encode(d_model, num_heads, ff_dim, rate)
        self.transformer_block2 = TransformerBlock_decode(d_model, num_heads, ff_dim, rate)
        self.flatten = layers.Flatten()
        self.fc1 = layers.Dense(512, activation="relu")
        self.fc2 = layers.Dense(256, activation="relu")
        # Removed the final sigmoid layer to return the feature vector instead

    def call(self, inputs, training=None):
        x = self.embedding(inputs)
        y = self.transformer_block(x, training=training)
        x = self.transformer_block2(x, y, y, training=training)
        x = self.flatten(x)
        x = self.fc1(x)
        return self.fc2(x)
    
    @classmethod
    def from_config(cls, config):
        return cls(**config['config'])

class TransformerBlock_decode(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock_decode, self).__init__()

        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.att1 = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(d_model)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm3 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)
        self.dropout3 = layers.Dropout(rate)

    def call(self, inputs, q, k, training=None):
        attn_output = self.att(inputs, inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))

        attn_output1 = self.att1(q, k, out1)
        out2 = self.layernorm2(out1 + self.dropout2(attn_output1, training=training))

        ffn_output = self.ffn(out2)
        return self.layernorm3(out2 + self.dropout3(ffn_output, training=training))

class TransformerBlock_Encode(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock_Encode, self).__init__()
        self.con = layers.Conv1D(256, 5, padding='same')
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(d_model)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):
        inputs = self.con(inputs)
        attn_output = self.att(inputs, inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))

        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + self.dropout2(ffn_output, training=training))

# Define the model parameters
input_vocab_size = data.shape[1]  # Use the actual number of features as vocab size
d_model = 256
num_heads = 4
ff_dim = 128

model = TransformerModel(input_vocab_size, d_model, num_heads, ff_dim)

# Convert data to tensor
data_tensor = tf.convert_to_tensor(data, dtype=tf.float32)

# Pass through the transformer encoder
transformed_features = model(data_tensor)

# Save transformed features
transformed_features = transformed_features.numpy()
transformed_df = pd.DataFrame(transformed_features, columns=[f'feature_{i}' for i in range(transformed_features.shape[1])])
transformed_df.to_csv('transformed_data.csv', index=False)


In [5]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.preprocessing import scale
from keras.saving import register_keras_serializable

# Load data
path = "/kaggle/input/ace-dataset/auto_enco_feat/"
data_ = pd.read_csv(path + 'AEDN-100.csv')

data_np = np.array(data_)
data = scale(data_np[:, 1:])

label1 = np.ones((394, 1))  # Value can be changed
label2 = np.zeros((626, 1))
labels = np.append(label1, label2)

# Define the positional encoding function
def positional_encoding(positions, d):
    pos = np.arange(positions)[:, np.newaxis]
    k = np.arange(d)[np.newaxis, :]
    i = k // 2
    angle_rads = pos / (10000 ** (2 * i / d))

    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])

    pos_encoding = angle_rads[np.newaxis, ...]

    return tf.cast(pos_encoding, dtype=tf.float32)

@register_keras_serializable()
class TransformerModel(keras.Model):
    def __init__(self, input_vocab_size, d_model, num_heads, ff_dim, rate=0.1, maxlen=100):
        super(TransformerModel, self).__init__()

        self.embedding = layers.Embedding(input_vocab_size, d_model)
        self.PE = positional_encoding(maxlen, d_model)
        self.encoder_layers = [
            TransformerBlock_Encode(d_model, num_heads, ff_dim, rate) for _ in range(2)
        ]
        self.flatten = layers.Flatten()
        self.linear = layers.Dense(1024, activation="relu")
        self.output_layer = layers.Dense(1, activation="sigmoid")  # Assuming binary classification

    def call(self, inputs, training=None):
        x = self.embedding(inputs)
        for encoder_layer in self.encoder_layers:
            x = encoder_layer(x, training=training)
        x = self.flatten(x)
        x = self.linear(x)
        return self.output_layer(x)
    
    @classmethod
    def from_config(cls, config):
        return cls(**config['config'])

class TransformerBlock_Encode(layers.Layer):
    def __init__(self, d_model, num_heads, ff_dim, rate=0.1):
        super(TransformerBlock_Encode, self).__init__()
        self.att = layers.MultiHeadAttention(num_heads=num_heads, key_dim=d_model)
        self.ffn = keras.Sequential(
            [layers.Dense(ff_dim, activation="relu"), layers.Dense(d_model)]
        )
        self.layernorm1 = layers.LayerNormalization(epsilon=1e-6)
        self.layernorm2 = layers.LayerNormalization(epsilon=1e-6)
        self.dropout1 = layers.Dropout(rate)
        self.dropout2 = layers.Dropout(rate)

    def call(self, inputs, training=None):
        attn_output = self.att(inputs, inputs, inputs)
        out1 = self.layernorm1(inputs + self.dropout1(attn_output, training=training))

        ffn_output = self.ffn(out1)
        return self.layernorm2(out1 + self.dropout2(ffn_output, training=training))

# Define the model parameters
input_vocab_size = data.shape[1]  # Use the actual number of features as vocab size
d_model = 256
num_heads = 4
ff_dim = 128

model = TransformerModel(input_vocab_size, d_model, num_heads, ff_dim)

# Convert data to tensor
data_tensor = tf.convert_to_tensor(data, dtype=tf.float32)

# Pass through the transformer encoder
transformed_features = model(data_tensor)

# Save transformed features
transformed_features = transformed_features.numpy()
transformed_df = pd.DataFrame(transformed_features, columns=[f'feature_{i}' for i in range(transformed_features.shape[1])])
transformed_df.to_csv('transformed_data2.csv', index=False)
