In [12]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, Embedding, Flatten, MultiHeadAttention, LayerNormalization, Dropout
from tensorflow.keras.models import Model
from sklearn.model_selection import train_test_split

import pandas as pd

In [2]:
class MultiHeadSelfAttention(Model):
    def __init__(self, embed_size, num_heads):
        super(MultiHeadSelfAttention, self).__init__()
        self.embed_size = embed_size
        self.num_heads = num_heads
        self.attention = MultiHeadAttention(num_heads=num_heads, key_dim=embed_size)
        
    def call(self, x):
        # x.shape = (batch_size, seq_len, embed_size)
        attn_output = self.attention(x, x, x)
        # attn_output.shape = (batch_size, seq_len, embed_size)
        return attn_output

In [3]:
class MyModel(Model):
    def __init__(self, embed_size, num_heads, num_features):
        super(MyModel, self).__init__()
        self.embed_size = embed_size
        self.num_heads = num_heads
        self.num_features = num_features
        
        # Define the layers
        self.dense_proj = Dense(embed_size, activation="relu")
        self.attention = MultiHeadSelfAttention(embed_size, num_heads)
        self.norm = LayerNormalization(epsilon=1e-6)
        self.flatten = Flatten()
        self.final_dense = Dense(1, activation="sigmoid")  # Assuming binary classification
        
    def call(self, inputs):
        # inputs.shape = (batch_size, num_features)
        x = self.dense_proj(inputs)
        # x.shape = (batch_size, num_features, embed_size)
        attn_output = self.attention(x)
        attn_output = self.norm(attn_output + x)
        out = self.flatten(attn_output)
        return self.final_dense(out)


In [4]:
def create_model(input_shape, embed_size, num_heads, ff_dim, num_layers, num_classes):
    # 创建模型
    inputs = Input(shape=(input_shape,))
    x = Dense(embed_size)(inputs)
    for _ in range(num_layers):
        x = TransformerBlock(embed_size, num_heads, ff_dim)(x)
    x = Dense(128, activation="relu")(x)
    x = Dropout(0.1)(x)
    outputs = Dense(num_classes, activation="softmax")(x)
    model = Model(inputs=inputs, outputs=outputs)
    return model

In [5]:
# # Example parameters (should be tuned for your specific task)
# input_shape = data.shape[1] - 1 # Excluding target variable
# embed_size = 256
# num_heads = 8
# ff_dim = 512
# num_layers = 2
# num_classes = 2 # Adjust based on your task, e.g., binary classification

In [6]:
# Example model parameters
embed_size = 32
num_heads = 4
num_features = 10  # Number of input features

In [7]:
# Create the model
# model = create_model(input_shape, embed_size, num_heads, ff_dim, num_layers, num_classes)


In [8]:
# 计算类别权重（根据数据集进行调整）
# 加权交叉熵损失函数
class_weights = {0: 84.21,  # 多数类别的权重
                 1: 1600  # 少数类别的权重
                 }

In [9]:
# Create and compile the model
model = MyModel(embed_size, num_heads, num_features)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=[tf.keras.metrics.AUC()])

In [10]:
# Model summary
model.summary()

In [13]:
data = pd.read_csv('/kaggle/input/fraud-detection-2/data/process_data.csv')


In [16]:
# 分离特征和目标变量
X = data.drop('RES', axis=1)
y = data['RES']
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.3,random_state=123)

In [17]:
# 训练模型时传递类别权重
model.fit(X_train, y_train, class_weight=class_weights, epochs=1, batch_size=32)

IndexError: Exception encountered when calling Softmax.call().

[1mtuple index out of range[0m

Arguments received by Softmax.call():
  • inputs=tf.Tensor(shape=(32, 4), dtype=float32)
  • mask=None