In [13]:
import tensorflow as tf
import pandas as pd
import numpy as np
from spektral.utils import preprocess
from spektral.layers import GCNConv
from sklearn.model_selection import train_test_split

# Step 1: Load and preprocess the dataset
file_path = '/Users/mac/Desktop/abm.csv'  # Path to abm.csv
df = pd.read_csv(file_path)

# Assume 'customer_id', 'transaction_id', and other features exist in the dataset
# Create nodes and edges based on relationships (e.g., customer-to-transaction)
node_features = df[['amount_cad', 'cash_indicator', 'debit_credit']].fillna(0).values
node_labels = (df['is_anomaly'] if 'is_anomaly' in df.columns else np.zeros(len(df))).astype(int)

# Map customer and transaction IDs to unique integers
node_mapping = {id_: idx for idx, id_ in enumerate(df['customer_id'].unique())}
df['mapped_customer_id'] = df['customer_id'].map(node_mapping)

# Verify the existence of the 'transaction_id' column and handle it appropriately
if 'transaction_id' in df.columns:
    # Create edges (example: customer-to-transaction links)
    edge_index = np.array([
        df['mapped_customer_id'].values,
        df['transaction_id'].values  # Replace with actual transaction links if needed
    ])
else:
    # If 'transaction_id' does not exist, create a dummy edge_index for demonstration
    print("'transaction_id' column not found. Creating dummy edges.")
    edge_index = np.array([
        df['mapped_customer_id'].values,
        np.random.randint(0, len(df), len(df))  # Randomly generated transaction IDs for demo purposes
    ])

# Step 2: Preprocess the graph data
edge_index = preprocess.adjacency_matrix(edge_index)  # Convert to adjacency matrix

# Convert to TensorFlow compatible format
x = tf.convert_to_tensor(node_features, dtype=tf.float32)
y = tf.convert_to_tensor(node_labels, dtype=tf.int32)
edge_index = tf.convert_to_tensor(edge_index, dtype=tf.int32)

# Step 3: Define the Graph CNN model (TensorFlow version)
class GCN(tf.keras.Model):
    def __init__(self, input_dim, hidden_dim, output_dim):
        super(GCN, self).__init__()
        self.conv1 = GCNConv(hidden_dim)
        self.conv2 = GCNConv(output_dim)

    def call(self, inputs):
        x, edge_index = inputs
        x = self.conv1([x, edge_index])
        x = tf.nn.relu(x)
        x = self.conv2([x, edge_index])
        return x

# Step 4: Initialize model, optimizer, and loss
model = GCN(input_dim=x.shape[1], hidden_dim=16, output_dim=len(np.unique(y)))
optimizer = tf.keras.optimizers.Adam(learning_rate=0.01)
loss_fn = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

# Step 5: Train the model
def train_step(x, edge_index, y):
    with tf.GradientTape() as tape:
        logits = model([x, edge_index])
        loss = loss_fn(y, logits)
    gradients = tape.gradient(loss, model.trainable_variables)
    optimizer.apply_gradients(zip(gradients, model.trainable_variables))
    return loss

# Step 6: Test the model
def test_step(x, edge_index, y):
    logits = model([x, edge_index])
    pred = tf.argmax(logits, axis=-1)
    accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, y), tf.float32))
    return accuracy

# Training loop
for epoch in range(1, 101):
    loss = train_step(x, edge_index, y)
    if epoch % 10 == 0:
        accuracy = test_step(x, edge_index, y)
        print(f"Epoch {epoch:03d}, Loss: {loss:.4f}, Accuracy: {accuracy:.4f}")


ValueError: numpy.dtype size changed, may indicate binary incompatibility. Expected 96 from C header, got 88 from PyObject