In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from sklearn import preprocessing
import matplotlib.pyplot as plt

In [2]:
# Step 1: Data Preprocessing and Label Encoding
def df_label_encoder(df, columns):
    le = preprocessing.LabelEncoder()
    for col in columns:
        df[col] = le.fit_transform(df[col].astype(str))
    return df

In [3]:
# Preprocess the dataset and extract features for node creation
def preprocess(df):
    df = df_label_encoder(df, ['merchant', 'category', 'city', 'state', 'job'])
    df['amt'] = (df['amt'] - df['amt'].min()) / (df['amt'].max() - df['amt'].min())
    df['node_from'] = df['cc_num'].astype(str)
    df['node_to'] = df['merchant'].astype(str)
    df = df.sort_values(by=['node_from'])
    node_list = pd.concat([df['node_from'], df['node_to']]).unique()
    return df, node_list

In [4]:
def create_graph_data(df, node_list):
    node_map = {node: idx for idx, node in enumerate(node_list)}
    edge_index = np.array([
        [node_map[from_node], node_map[to_node]] for from_node, to_node in zip(df['node_from'], df['node_to'])
    ], dtype=np.int64).T

    node_features = np.array(df[['amt', 'category', 'city', 'state']].values, dtype=np.float32)
    labels = np.array(df['is_fraud'].values, dtype=np.int64)
    return node_features, edge_index, labels

In [5]:
# Load dataset and preprocess
df = pd.read_csv('creditcard/fraudTrain.csv')  # Update with your .csv file path
df, node_list = preprocess(df)

In [6]:
# Create the graph dataset
node_features, edge_index, labels = create_graph_data(df, node_list)

In [7]:
# Step 2: Create a GAT Layer using TensorFlow
class GATLayer(tf.keras.layers.Layer):
    def __init__(self, output_dim, num_heads=1, dropout_rate=0.6):
        super(GATLayer, self).__init__()
        self.output_dim = output_dim
        self.num_heads = num_heads
        self.dropout_rate = dropout_rate

    def build(self, input_shape):
        self.W = self.add_weight(shape=(input_shape[-1], self.output_dim),
                                 initializer='glorot_uniform',
                                 trainable=True)
        self.attn_kernel = self.add_weight(shape=(2 * self.output_dim, 1),
                                           initializer='glorot_uniform',
                                           trainable=True)
        self.leaky_relu = tf.keras.layers.LeakyReLU(alpha=0.2)

    def call(self, node_features, adj_matrix, training=True):
        h = tf.matmul(node_features, self.W)

        num_nodes = h.shape[0]
        h_expanded = tf.tile(tf.expand_dims(h, axis=0), [num_nodes, 1, 1])
        h_expanded_transposed = tf.transpose(h_expanded, [1, 0, 2])

        a_input = tf.concat([h_expanded, h_expanded_transposed], axis=-1)
        e = self.leaky_relu(tf.matmul(a_input, self.attn_kernel))

        attention = tf.where(adj_matrix > 0, e, tf.zeros_like(e))
        attention = tf.nn.softmax(attention, axis=1)

        h_prime = tf.matmul(attention, h)

        if training:
            h_prime = tf.nn.dropout(h_prime, rate=self.dropout_rate)

        return tf.nn.elu(h_prime)


In [8]:
# Step 3: Create adjacency matrix
def create_adjacency_matrix(edge_index, num_nodes):
    adj_matrix = np.zeros((num_nodes, num_nodes), dtype=np.float32)
    for i, j in zip(edge_index[0], edge_index[1]):
        if i < num_nodes and j < num_nodes:
            adj_matrix[i, j] = 1
    return adj_matrix
# create sparse adj matrix

In [9]:
# Step 4: Define the GAT model using TensorFlow
class GATModel(tf.keras.Model):
    def __init__(self, num_features, hidden_size):
        super(GATModel, self).__init__()
        self.gat_layer = GATLayer(output_dim=hidden_size)

    def call(self, node_features, adj_matrix, training=False):
        return self.gat_layer(node_features, adj_matrix, training)

In [10]:
# Step 5: Prepare the graph input data
num_nodes = len(node_features)
adj_matrix = create_adjacency_matrix(edge_index, num_nodes)

In [11]:
# Step 6: Initialize and run the GAT model
gat_model = GATModel(num_features=node_features.shape[1], hidden_size=128)
gat_output = gat_model(node_features, adj_matrix, training=True)

ResourceExhaustedError: OOM when allocating tensor with shape[1296675,1296675,128] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc [Op:Tile]

In [None]:
# Step 7: Print the output of GAT
print("GAT output:", gat_output)